In [102]:
import pandas as pd

# Importing data and extracting dogs only

In [103]:
# Dataframe for animal intakes and outcomes
dataframe_intakes_outcomes = pd.read_csv("aac_intakes_outcomes.csv")

df_dogs = dataframe_intakes_outcomes[dataframe_intakes_outcomes["animal_type"].str.contains("Dog")==True]

df_dogs = df_dogs[['animal_id_intake', 'breed']]

df_dogs

Unnamed: 0,animal_id_intake,breed
0,A006100,Spinone Italiano Mix
1,A006100,Spinone Italiano Mix
2,A006100,Spinone Italiano Mix
3,A047759,Dachshund
4,A134067,Shetland Sheepdog
...,...,...
79656,A769042,Miniature Poodle/Maltese
79657,A769043,Beagle/Australian Cattle Dog
79660,A769047,Border Collie Mix
79670,A769066,Labrador Retriever Mix


### Add a column for mixed or pure breed

In [104]:
df_dogs['mixed_breed'] = (df_dogs['breed'].str.contains('Mix') | df_dogs['breed'].str.contains('/'))

# Rename Id column
df_dogs = df_dogs.rename(columns={
    'animal_id_intake': 'id'})

### Merge with dogs_cleaned dataset

In [105]:
dogs_cleaned = pd.read_csv("dogs_cleaned.csv")
dogs_cleaned = dogs_cleaned.rename(columns={
    'Breed Name': 'breed'})

# Merge original dataset with dog breed size

# Transform breed to ignore mixed races
df_dogs['breed'] = df_dogs['breed'].map(lambda i: i.split('/')[0])
df_dogs['breed'] = df_dogs['breed'].map(lambda i: i.split(' Mix')[0])

# Correcting breed names accross data
df_dogs.loc[df_dogs["breed"] == 'German Shepherd', 'breed'] = 'German Shepherd Dog'
df_dogs.loc[df_dogs["breed"] == 'Pit Bull', 'breed'] = 'American Pit Bull Terrier'
df_dogs.loc[df_dogs["breed"] == 'Miniature Poodle', 'breed'] = 'Pomeranian'
df_dogs.loc[df_dogs["breed"] == 'Chihuahua Shorthair', 'breed'] = 'Chihuahua'
df_dogs.loc[df_dogs["breed"] == 'Alaskan Husky', 'breed'] = 'Siberian Husky'
df_dogs.loc[df_dogs["breed"] == 'Chesa Bay Retr', 'breed'] = 'Chesapeake Bay Retriever'
df_dogs.loc[df_dogs["breed"] == 'Catahoula', 'breed'] = 'Catahoula Bulldog'
df_dogs.loc[df_dogs["breed"] == 'Wire Hair Fox Terrier', 'breed'] = 'Fox Terrier'
df_dogs.loc[df_dogs["breed"] == 'West Highland', 'breed'] = 'West Highland White Terrier'
df_dogs.loc[df_dogs["breed"] == 'Treeing Tennesse Brindle', 'breed'] = 'Treeing Tennessee Brindle'
df_dogs.loc[df_dogs["breed"] == 'Staffordshire', 'breed'] = 'Staffordshire Bull Terrier'
df_dogs.loc[df_dogs["breed"] == 'Chihuahua Longhair', 'breed'] = 'Chihuahua'
df_dogs.loc[df_dogs["breed"] == 'Dachshund Wirehair', 'breed'] = 'Dachshund'
df_dogs.loc[df_dogs["breed"] == 'Dachshund Longhair', 'breed'] = 'Dachshund'
df_dogs.loc[df_dogs["breed"] == 'Chinese Sharpei', 'breed'] = 'Chinese Shar-Pei'
df_dogs.loc[df_dogs["breed"] == 'Anatol Shepherd', 'breed'] = 'Anatolian Shepherd Dog'
df_dogs.loc[df_dogs["breed"] == 'Plott Hound', 'breed'] = 'Plott'
df_dogs.loc[df_dogs["breed"] == 'Doberman Pinsch', 'breed'] = 'Doberman Pinscher'
df_dogs.loc[df_dogs["breed"] == 'Chinese Sharpei', 'breed'] = 'Chinese Shar-Pei'

# Merge
df_dogs = pd.merge(df_dogs, dogs_cleaned, on='breed', how='left')


In [106]:
# helper dataframe for refinement
df_no_breed_info = df_dogs[df_dogs['Detailed Description Link'].isnull()] 
df_no_breed_info['Frequency'] = df_no_breed_info.groupby('breed')['breed'].transform('count')
df_no_breed_info.sort_values('Frequency', ascending=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_breed_info['Frequency'] = df_no_breed_info.groupby('breed')['breed'].transform('count')


Unnamed: 0,id,breed,mixed_breed,Detailed Description Link,Dog Size,Dog Breed Group,Height,"Avg. Height, cm",Weight,"Avg. Weight, kg",...,Potential For Mouthiness,Prey Drive,Tendency To Bark Or Howl,Wanderlust Potential,Physical Needs,Energy Level,Intensity,Exercise Needs,Potential For Playfulness,Frequency
21,A230482,Queensland Heeler,False,,,,,,,,...,,,,,,,,,,183
19377,A700890,Queensland Heeler,True,,,,,,,,...,,,,,,,,,,183
22033,A707873,Queensland Heeler,False,,,,,,,,...,,,,,,,,,,183
20701,A704303,Queensland Heeler,True,,,,,,,,...,,,,,,,,,,183
20084,A702593,Queensland Heeler,True,,,,,,,,...,,,,,,,,,,183
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4986,A667463,Eng Toy Spaniel,True,,,,,,,,...,,,,,,,,,,1
16117,A694112,Sealyham Terr,True,,,,,,,,...,,,,,,,,,,1
6832,A671428,Swiss Hound,True,,,,,,,,...,,,,,,,,,,1
34828,A740412,Grand Basset Griffon Vendeen,True,,,,,,,,...,,,,,,,,,,1


### Drop dogs with unknown breed characteristics

In [107]:
df_dogs = df_dogs[df_dogs["Detailed Description Link"].isna()==False]
df_dogs

Unnamed: 0,id,breed,mixed_breed,Detailed Description Link,Dog Size,Dog Breed Group,Height,"Avg. Height, cm",Weight,"Avg. Weight, kg",...,Intelligence,Potential For Mouthiness,Prey Drive,Tendency To Bark Or Howl,Wanderlust Potential,Physical Needs,Energy Level,Intensity,Exercise Needs,Potential For Playfulness
0,A006100,Spinone Italiano,True,https://dogtime.com/dog-breeds/spinone-italiano,Very Large,Sporting Dogs,22 to 28 inches,63.50,61 to 86 pounds,33.08,...,4.0,2.0,4.0,2.0,2.0,3.33,4.0,2.0,4.0,4.0
1,A006100,Spinone Italiano,True,https://dogtime.com/dog-breeds/spinone-italiano,Very Large,Sporting Dogs,22 to 28 inches,63.50,61 to 86 pounds,33.08,...,4.0,2.0,4.0,2.0,2.0,3.33,4.0,2.0,4.0,4.0
2,A006100,Spinone Italiano,True,https://dogtime.com/dog-breeds/spinone-italiano,Very Large,Sporting Dogs,22 to 28 inches,63.50,61 to 86 pounds,33.08,...,4.0,2.0,4.0,2.0,2.0,3.33,4.0,2.0,4.0,4.0
3,A047759,Dachshund,False,https://dogtime.com/dog-breeds/dachshund,Medium,Hound Dogs,8 inches to 9 inches tall at the shoulder,21.59,16 to 32 pounds,10.80,...,4.0,4.0,5.0,5.0,5.0,3.00,3.0,3.0,3.0,4.0
4,A134067,Shetland Sheepdog,False,https://dogtime.com/dog-breeds/shetland-sheepdog,Medium,Herding Dogs,13 to 16 inches tall at the shoulder,36.83,Starts at 20 pounds,9.00,...,5.0,1.0,3.0,4.0,2.0,3.33,4.0,2.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45361,A769042,Pomeranian,True,https://dogtime.com/dog-breeds/pomeranian,Very Small,Companion Dogs,7 to 12 inches tall at the shoulder,24.13,3 to 7 pounds,2.25,...,4.0,2.0,2.0,5.0,1.0,2.33,3.0,2.0,2.0,3.0
45362,A769043,Beagle,True,https://dogtime.com/dog-breeds/beagle,Medium,Hound Dogs,13 to 15 inches tall at the shoulder,35.56,18 to 30 pounds,10.80,...,4.0,3.0,5.0,5.0,5.0,4.33,4.0,5.0,4.0,5.0
45363,A769047,Border Collie,True,https://dogtime.com/dog-breeds/border-collie,Large,Herding Dogs,18 to 22 inches tall at the shoulder,50.80,30 to 45 pounds,16.88,...,5.0,3.0,3.0,2.0,3.0,4.33,5.0,3.0,5.0,5.0
45364,A769066,Labrador Retriever,True,https://dogtime.com/dog-breeds/labrador-retriever,Very Large,Sporting Dogs,21 to 24 inches at the shoulder,57.15,55 to 80 pounds,30.38,...,5.0,5.0,2.0,4.0,3.0,5.00,5.0,5.0,5.0,5.0
