In [70]:
import pandas as pd

# Read dog intakes and outcomes data  

In [171]:
intakes_data = pd.read_csv('dog_intakes_breed.csv', dtype={
    'AnimalID': 'string',
    'ID': 'string',
    'Breed': 'string',
    'Color': 'string',
    'Sex': 'string',
    'Name': 'string',
    'Intake-DateTime': 'string',
    'Intake-Type': 'string',
    'Intake-Condition': 'string',
    'Intake-Age(days)': 'int',
    'Found-Location': 'string'
}, parse_dates=['Intake-DateTime'])

In [172]:
intakes_data.dtypes

AnimalID            string[python]
ID                  string[python]
Breed               string[python]
Color               string[python]
Sex                 string[python]
Name                string[python]
Intake-DateTime     datetime64[ns]
Intake-Type         string[python]
Intake-Condition    string[python]
Intake-Age(days)             int32
Found-Location      string[python]
dtype: object

In [173]:
intakes_data

Unnamed: 0,AnimalID,ID,Breed,Color,Sex,Name,Intake-DateTime,Intake-Type,Intake-Condition,Intake-Age(days),Found-Location
0,A006100,A006100,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2014-03-07 14:26:00,Public Assist,Normal,2190,8700 Research in Austin (TX)
1,A006100,A006100+,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2014-12-19 10:21:00,Public Assist,Normal,2555,8700 Research Blvd in Austin (TX)
2,A006100,A006100++,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2017-12-07 14:07:00,Stray,Normal,3650,Colony Creek And Hunters Trace in Austin (TX)
3,A047759,A006101,Dachshund,Tricolor,Neutered Male,Oreo,2014-04-02 15:55:00,Owner Surrender,Normal,3650,Austin (TX)
4,A134067,A134067,Shetland Sheepdog,Brown/White,Neutered Male,Bandit,2013-11-16 09:02:00,Public Assist,Injured,12190,12034 Research Blvd in Austin (TX)
...,...,...,...,...,...,...,...,...,...,...,...
87059,A893570,A893570,Rottweiler,Black/Brown,Intact Female,,2023-11-23 12:17:00,Stray,Normal,730,5500 Burleson Rd in Austin (TX)
87060,A893573,A893573,Border Terrier,Brown,Unknown,,2023-11-23 13:45:00,Stray,Normal,730,10909 Maha Loop Rd in Travis (TX)
87061,A893578,A893578,American Staffordshire Terrier,Yellow Brindle,Intact Male,Tiger,2023-11-23 20:19:00,Stray,Injured,240,South Mopac Highway in Austin (TX)
87062,A893579,A893579,American Staffordshire Terrier,Black/White,Intact Male,,2023-11-23 21:00:00,Stray,Injured,730,South Mopac Highway in Austin (TX)


In [174]:
outcomes_data = pd.read_csv('dog_outcomes_breed.csv', dtype={
    'AnimalID': 'string',
    'ID': 'string',
    'Breed': 'string',
    'Color': 'string',
    'Sex': 'string',
    'Name': 'string',
    'Date-Of-Birth': 'string',
    'Outcome-DateTime': 'string',
    'Outcome-Type': 'string',
    'Outcome-Subtype': 'string',
    'Outcome-Age(days)': 'int'
}, parse_dates=['Date-Of-Birth', 'Outcome-DateTime'])

In [175]:
outcomes_data.dtypes

AnimalID             string[python]
ID                   string[python]
Breed                string[python]
Color                string[python]
Sex                  string[python]
Name                 string[python]
Date-Of-Birth        datetime64[ns]
Outcome-DateTime     datetime64[ns]
Outcome-Type         string[python]
Outcome-Subtype      string[python]
Outcome-Age(days)             int32
dtype: object

In [176]:
outcomes_data

Unnamed: 0,AnimalID,ID,Breed,Color,Sex,Name,Date-Of-Birth,Outcome-DateTime,Outcome-Type,Outcome-Subtype,Outcome-Age(days)
0,A006100,A006100,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2007-07-09,2014-03-08 17:10:00,Return to Owner,,2435
1,A006100,A006100+,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2007-07-09,2014-12-20 16:35:00,Return to Owner,,2722
2,A006100,A006100++,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2007-07-09,2017-12-07 00:00:00,Return to Owner,,3804
3,A047759,A047759,Dachshund,Tricolor,Neutered Male,Oreo,2004-04-02,2014-04-07 15:12:00,Transfer,Partner,3658
4,A134067,A134067,Shetland Sheepdog,Brown/White,Neutered Male,Bandit,1997-10-16,2013-11-16 11:54:00,Return to Owner,,5875
...,...,...,...,...,...,...,...,...,...,...,...
86974,A893431,A893431,Chihuahua,Tricolor,Intact Female,Chili,2015-11-21,2023-11-21 15:41:00,Return to Owner,,2923
86975,A893432,A893432,Chihuahua,Tan/White,Intact Female,Coco,2015-11-21,2023-11-21 15:41:00,Return to Owner,,2923
86976,A893452,A893452,Maltese,White,Intact Female,Sophie,2016-11-21,2023-11-22 11:26:00,Return to Owner,,2557
86977,A893529,A893529,Labrador Retriever,White,Intact Female,,2023-09-22,2023-11-22 16:51:00,Transfer,Partner,62


# Merge intakes and outcomes dataframes

In [177]:
left_merged = pd.merge(intakes_data, outcomes_data, how="left", on=["AnimalID", "ID", "Breed", "Color", "Sex", "Name"], validate="1:1")
left_merged['Outcome-Age(days)']=left_merged['Outcome-Age(days)'].astype('Int64')
left_merged.dtypes

AnimalID             string[python]
ID                   string[python]
Breed                string[python]
Color                string[python]
Sex                  string[python]
Name                 string[python]
Intake-DateTime      datetime64[ns]
Intake-Type          string[python]
Intake-Condition     string[python]
Intake-Age(days)              int32
Found-Location       string[python]
Date-Of-Birth        datetime64[ns]
Outcome-DateTime     datetime64[ns]
Outcome-Type         string[python]
Outcome-Subtype      string[python]
Outcome-Age(days)             Int64
dtype: object

In [178]:
left_merged

Unnamed: 0,AnimalID,ID,Breed,Color,Sex,Name,Intake-DateTime,Intake-Type,Intake-Condition,Intake-Age(days),Found-Location,Date-Of-Birth,Outcome-DateTime,Outcome-Type,Outcome-Subtype,Outcome-Age(days)
0,A006100,A006100,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2014-03-07 14:26:00,Public Assist,Normal,2190,8700 Research in Austin (TX),2007-07-09,2014-03-08 17:10:00,Return to Owner,,2435
1,A006100,A006100+,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2014-12-19 10:21:00,Public Assist,Normal,2555,8700 Research Blvd in Austin (TX),2007-07-09,2014-12-20 16:35:00,Return to Owner,,2722
2,A006100,A006100++,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2017-12-07 14:07:00,Stray,Normal,3650,Colony Creek And Hunters Trace in Austin (TX),2007-07-09,2017-12-07 00:00:00,Return to Owner,,3804
3,A047759,A006101,Dachshund,Tricolor,Neutered Male,Oreo,2014-04-02 15:55:00,Owner Surrender,Normal,3650,Austin (TX),NaT,NaT,,,
4,A134067,A134067,Shetland Sheepdog,Brown/White,Neutered Male,Bandit,2013-11-16 09:02:00,Public Assist,Injured,12190,12034 Research Blvd in Austin (TX),1997-10-16,2013-11-16 11:54:00,Return to Owner,,5875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87059,A893570,A893570,Rottweiler,Black/Brown,Intact Female,,2023-11-23 12:17:00,Stray,Normal,730,5500 Burleson Rd in Austin (TX),NaT,NaT,,,
87060,A893573,A893573,Border Terrier,Brown,Unknown,,2023-11-23 13:45:00,Stray,Normal,730,10909 Maha Loop Rd in Travis (TX),NaT,NaT,,,
87061,A893578,A893578,American Staffordshire Terrier,Yellow Brindle,Intact Male,Tiger,2023-11-23 20:19:00,Stray,Injured,240,South Mopac Highway in Austin (TX),NaT,NaT,,,
87062,A893579,A893579,American Staffordshire Terrier,Black/White,Intact Male,,2023-11-23 21:00:00,Stray,Injured,730,South Mopac Highway in Austin (TX),NaT,NaT,,,


# Read dog breeds data

In [179]:
breed_data = pd.read_csv('breed_data.csv', dtype={
    'Breed': 'string',
    'Category': 'category',
    'Intelligence-Ranking': 'Int64',
    'Intelligence-Category': 'category',
    'Size-Category': 'category',
    'Lifetime-Cost($)': 'Int64',
    'Purchase-Cost($)': 'Int64'
})

In [180]:
breed_data

Unnamed: 0,Breed,Category,Intelligence-Ranking,Intelligence-Category,Size-Category,Lifetime-Cost($),Purchase-Cost($)
0,Affenpinscher,toy,37,above-average,small,15835,510
1,Afghan Hound,hound,80,Lowest,large,20818,890
2,Airedale Terrier,terrier,29,above-average,medium,,733
3,Akita,working,54,average,large,18217,1202
4,Alaskan Malamute,working,50,average,large,19069,1210
...,...,...,...,...,...,...,...
167,Whippet,hound,51,average,medium,18160,915
168,Wire Fox Terrier,terrier,51,average,small,,668
169,Wirehaired Pointing Griffon,sporting,46,average,medium,,755
170,Xoloitzcuintli,non-sporting,,,medium,,717


In [181]:
data = pd.merge(left_merged, breed_data, how="left", on=["Breed"], indicator=True)
data

Unnamed: 0,AnimalID,ID,Breed,Color,Sex,Name,Intake-DateTime,Intake-Type,Intake-Condition,Intake-Age(days),...,Outcome-Type,Outcome-Subtype,Outcome-Age(days),Category,Intelligence-Ranking,Intelligence-Category,Size-Category,Lifetime-Cost($),Purchase-Cost($),_merge
0,A006100,A006100,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2014-03-07 14:26:00,Public Assist,Normal,2190,...,Return to Owner,,2435,sporting,,,large,,1725,both
1,A006100,A006100+,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2014-12-19 10:21:00,Public Assist,Normal,2555,...,Return to Owner,,2722,sporting,,,large,,1725,both
2,A006100,A006100++,Spinone Italiano,Yellow/White,Neutered Male,Scamp,2017-12-07 14:07:00,Stray,Normal,3650,...,Return to Owner,,3804,sporting,,,large,,1725,both
3,A047759,A006101,Dachshund,Tricolor,Neutered Male,Oreo,2014-04-02 15:55:00,Owner Surrender,Normal,3650,...,,,,hound,49,average,small,17350,423,both
4,A134067,A134067,Shetland Sheepdog,Brown/White,Neutered Male,Bandit,2013-11-16 09:02:00,Public Assist,Injured,12190,...,Return to Owner,,5875,herding,6,brightest,small,17469,465,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87059,A893570,A893570,Rottweiler,Black/Brown,Intact Female,,2023-11-23 12:17:00,Stray,Normal,730,...,,,,working,9,brightest,large,16395,1118,both
87060,A893573,A893573,Border Terrier,Brown,Unknown,,2023-11-23 13:45:00,Stray,Normal,730,...,,,,terrier,30,above-average,small,19575,833,both
87061,A893578,A893578,American Staffordshire Terrier,Yellow Brindle,Intact Male,Tiger,2023-11-23 20:19:00,Stray,Injured,240,...,,,,terrier,34,above-average,medium,,1043,both
87062,A893579,A893579,American Staffordshire Terrier,Black/White,Intact Male,,2023-11-23 21:00:00,Stray,Injured,730,...,,,,terrier,34,above-average,medium,,1043,both


In [182]:
no_breed_match = data[data['_merge']=='left_only']

In [183]:
no_breed_match

Unnamed: 0,AnimalID,ID,Breed,Color,Sex,Name,Intake-DateTime,Intake-Type,Intake-Condition,Intake-Age(days),...,Outcome-Type,Outcome-Subtype,Outcome-Age(days),Category,Intelligence-Ranking,Intelligence-Category,Size-Category,Lifetime-Cost($),Purchase-Cost($),_merge
538,A498745,A498745,,White/Gray,Spayed Female,Peeps,2020-08-31 07:46:00,Stray,Injured,21095,...,Euthanasia,Suffering,8402,,,,,,,left_only
3442,A649522,A649522,,Brown,Intact Male,Capulin,2015-02-07 00:54:00,Stray,Normal,730,...,Return to Owner,,791,,,,,,,left_only
50668,A774795,A774795,,White,Intact Female,Pickles,2018-06-21 14:05:00,Public Assist,Normal,730,...,Return to Owner,,738,,,,,,,left_only
50669,A774798,A774798,,Cream,Spayed Female,Ketchup,2018-06-21 14:07:00,Public Assist,Normal,730,...,Return to Owner,,738,,,,,,,left_only
68386,A818737,A818737,,Blue/White,Unknown,,2020-06-14 14:27:00,Stray,Nursing,0,...,Euthanasia,Suffering,4,,,,,,,left_only
70302,A826087,A826087,,Black,Intact Female,Bertha,2020-11-19 09:46:00,Owner Surrender,Sick,28,...,Disposal,,35,,,,,,,left_only
72512,A834634,A834634,,Tricolor,Intact Male,,2021-05-15 00:00:00,Stray,Injured,1095,...,Disposal,,1099,,,,,,,left_only
