# Data Science Final Project - Shelter Animal Adoptions

## Questions:
## 1.  Are animal type, sex, and age significant predictors of shelter dog and cat adoption?
## 2.  What time of year are shelter dog and cat adoptions most popular?

## Import Packages

In [2]:
import pandas as pd   # for reading data, data wrangling, visualization, and analysis
import numpy as np   # for mathematical operations on arrays (in modeling)
from scipy import stats   # for independent chi-square (statistical tests)
import statsmodels as sm   # for mcnemar chi-square (statistical tests and models)
from statsmodels.stats.contingency_tables import mcnemar   # for mcnemar chi-square
import os   # to create "data" folder and convert df to csv

## Import Data

In [3]:
adoptions = pd.read_csv("Austin_Animal_Center_Outcomes.csv")

In [4]:
adoptions.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A794011,Chunk,5/8/2019 18:20,19-May,5/2/2017,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,Gizmo,7/18/2018 16:02,18-Jul,7/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,,8/16/2020 11:38,20-Aug,8/16/2019,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,Moose,2/13/2016 17:59,16-Feb,10/8/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,,3/18/2014 11:47,14-Mar,3/12/2014,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


## Data Wrangling

### Keep only relevant columns

In [5]:
adoptions1 = adoptions[["MonthYear", "Outcome Type", "Animal Type", "Sex upon Outcome", "Age upon Outcome", "Breed", "Color"]]

In [6]:
adoptions1.head()

Unnamed: 0,MonthYear,Outcome Type,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,19-May,Rto-Adopt,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,18-Jul,Adoption,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,20-Aug,Euthanasia,Other,Unknown,1 year,Raccoon,Gray
3,16-Feb,Adoption,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,14-Mar,Transfer,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


### Split MonthYear into 2 columns

In [7]:
adoptions2 = adoptions1["MonthYear"].str.split("-", expand=True)

In [8]:
adoptions2.head

<bound method NDFrame.head of          0    1
0       19  May
1       18  Jul
2       20  Aug
3       16  Feb
4       14  Mar
...     ..  ...
136751  22  Feb
136752  22  Feb
136753  22  Feb
136754  22  Feb
136755  22  Feb

[136756 rows x 2 columns]>

### Add new month and year columns back to dataframe

In [9]:
adoptions3 = pd.concat([adoptions1, adoptions2], axis=1)

In [10]:
adoptions3.head()

Unnamed: 0,MonthYear,Outcome Type,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,0,1
0,19-May,Rto-Adopt,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,19,May
1,18-Jul,Adoption,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,18,Jul
2,20-Aug,Euthanasia,Other,Unknown,1 year,Raccoon,Gray,20,Aug
3,16-Feb,Adoption,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,16,Feb
4,14-Mar,Transfer,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,14,Mar


### Rename new columns and drop original date column

In [11]:
adoptions3.rename(columns={0: "Year", 1: "Month"}, inplace=True)
adoptions3.drop(["MonthYear"], axis=1, inplace=True)

In [12]:
adoptions3.head()

Unnamed: 0,Outcome Type,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Year,Month
0,Rto-Adopt,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,19,May
1,Adoption,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,18,Jul
2,Euthanasia,Other,Unknown,1 year,Raccoon,Gray,20,Aug
3,Adoption,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,16,Feb
4,Transfer,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,14,Mar


### Group and Recode Sex upon Outcome column

In [13]:
adoptions3["Sex upon Outcome"].value_counts()

Neutered Male    48172
Spayed Female    43461
Intact Male      17371
Intact Female    16872
Unknown          10879
Name: Sex upon Outcome, dtype: int64

In [14]:
def sex (series): 
    if series == "Neutered Male" : 
        return "Male"
    if series == "Spayed Female": 
        return "Female"
    if series == "Intact Male" : 
        return "Male"
    if series == "Intact Female" : 
        return "Female"
    if series =="Unknown":
        return "Unknown"
    
adoptions3["Sex"] = adoptions3["Sex upon Outcome"].apply(sex)

In [15]:
adoptions3.head(10)

Unnamed: 0,Outcome Type,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Year,Month,Sex
0,Rto-Adopt,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,19,May,Male
1,Adoption,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,18,Jul,Male
2,Euthanasia,Other,Unknown,1 year,Raccoon,Gray,20,Aug,Unknown
3,Adoption,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,16,Feb,Male
4,Transfer,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,14,Mar,Male
5,Adoption,Dog,Spayed Female,7 years,Chihuahua Shorthair Mix,Brown,20,Oct,Female
6,Adoption,Dog,Neutered Male,2 years,American Foxhound/Labrador Retriever,White/Brown,20,May,Male
7,Adoption,Cat,Neutered Male,2 months,Domestic Shorthair Mix,Black,14,Oct,Male
8,Adoption,Cat,Neutered Male,2 months,Domestic Shorthair Mix,White/Orange Tabby,14,Aug,Male
9,Adoption,Dog,Spayed Female,2 years,Border Collie/Cardigan Welsh Corgi,Black/White,21,Aug,Female


### Drop Sex upon Outcome column

In [16]:
adoptions3.drop(["Sex upon Outcome"], axis=1, inplace=True)

In [17]:
adoptions3.head()

Unnamed: 0,Outcome Type,Animal Type,Age upon Outcome,Breed,Color,Year,Month,Sex
0,Rto-Adopt,Cat,2 years,Domestic Shorthair Mix,Brown Tabby/White,19,May,Male
1,Adoption,Dog,1 year,Chihuahua Shorthair Mix,White/Brown,18,Jul,Male
2,Euthanasia,Other,1 year,Raccoon,Gray,20,Aug,Unknown
3,Adoption,Dog,4 months,Anatol Shepherd/Labrador Retriever,Buff,16,Feb,Male
4,Transfer,Cat,6 days,Domestic Shorthair Mix,Orange Tabby,14,Mar,Male


### Filter Animal Type to only include Dog and Cat

In [18]:
adoptions4 = adoptions3[(adoptions3["Animal Type"].isin(["Dog", "Cat"]))]

In [19]:
adoptions4.head(10)

Unnamed: 0,Outcome Type,Animal Type,Age upon Outcome,Breed,Color,Year,Month,Sex
0,Rto-Adopt,Cat,2 years,Domestic Shorthair Mix,Brown Tabby/White,19,May,Male
1,Adoption,Dog,1 year,Chihuahua Shorthair Mix,White/Brown,18,Jul,Male
3,Adoption,Dog,4 months,Anatol Shepherd/Labrador Retriever,Buff,16,Feb,Male
4,Transfer,Cat,6 days,Domestic Shorthair Mix,Orange Tabby,14,Mar,Male
5,Adoption,Dog,7 years,Chihuahua Shorthair Mix,Brown,20,Oct,Female
6,Adoption,Dog,2 years,American Foxhound/Labrador Retriever,White/Brown,20,May,Male
7,Adoption,Cat,2 months,Domestic Shorthair Mix,Black,14,Oct,Male
8,Adoption,Cat,2 months,Domestic Shorthair Mix,White/Orange Tabby,14,Aug,Male
9,Adoption,Dog,2 years,Border Collie/Cardigan Welsh Corgi,Black/White,21,Aug,Female
10,Transfer,Cat,2 years,Domestic Shorthair Mix,Black,14,Jul,Female


### Filter out partial years from Year column

In [20]:
adoptions4["Year"].value_counts().sort_index(ascending=True)

13     4265
14    17556
15    17606
16    16572
17    16518
18    15791
19    18861
20     9046
21    11267
22     1377
Name: Year, dtype: int64

In [21]:
adoptions5 = adoptions4[(adoptions4["Year"].isin(["14", "15", "16", "17", "18", "19", "20", "21"]))]

In [22]:
adoptions5["Year"].unique()

array(['19', '18', '16', '14', '20', '21', '17', '15'], dtype=object)

In [23]:
adoptions5["Year"].value_counts().sort_index(ascending=True)

14    17556
15    17606
16    16572
17    16518
18    15791
19    18861
20     9046
21    11267
Name: Year, dtype: int64

In [24]:
adoptions5.head()

Unnamed: 0,Outcome Type,Animal Type,Age upon Outcome,Breed,Color,Year,Month,Sex
0,Rto-Adopt,Cat,2 years,Domestic Shorthair Mix,Brown Tabby/White,19,May,Male
1,Adoption,Dog,1 year,Chihuahua Shorthair Mix,White/Brown,18,Jul,Male
3,Adoption,Dog,4 months,Anatol Shepherd/Labrador Retriever,Buff,16,Feb,Male
4,Transfer,Cat,6 days,Domestic Shorthair Mix,Orange Tabby,14,Mar,Male
5,Adoption,Dog,7 years,Chihuahua Shorthair Mix,Brown,20,Oct,Female


### Drop outlier rows from Age upon Outcome column

In [25]:
adoptions5["Age upon Outcome"].value_counts()

1 year       19328
2 years      18392
2 months     16181
3 years       7430
3 months      6251
1 month       5639
4 years       4436
4 months      4170
5 years       4166
5 months      2994
6 years       2770
6 months      2674
8 years       2430
7 years       2396
3 weeks       2269
2 weeks       2185
8 months      1992
10 years      1980
10 months     1843
7 months      1634
4 weeks       1535
9 years       1352
9 months      1303
12 years       947
1 weeks        935
11 months      809
11 years       774
1 week         701
13 years       624
14 years       419
3 days         384
2 days         354
15 years       345
1 day          285
6 days         249
4 days         228
0 years        167
16 years       156
5 weeks        147
5 days         143
17 years        88
18 years        51
19 years        26
20 years        19
22 years         6
-1 years         6
-3 years         1
24 years         1
23 years         1
-2 years         1
Name: Age upon Outcome, dtype: int64

#### Begin by dropping the "22 years" row.

In [26]:
indexNames = adoptions5[adoptions5["Age upon Outcome"] == "22 years"].index

In [27]:
adoptions5.drop(indexNames, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [28]:
adoptions5["Age upon Outcome"].value_counts()

1 year       19328
2 years      18392
2 months     16181
3 years       7430
3 months      6251
1 month       5639
4 years       4436
4 months      4170
5 years       4166
5 months      2994
6 years       2770
6 months      2674
8 years       2430
7 years       2396
3 weeks       2269
2 weeks       2185
8 months      1992
10 years      1980
10 months     1843
7 months      1634
4 weeks       1535
9 years       1352
9 months      1303
12 years       947
1 weeks        935
11 months      809
11 years       774
1 week         701
13 years       624
14 years       419
3 days         384
2 days         354
15 years       345
1 day          285
6 days         249
4 days         228
0 years        167
16 years       156
5 weeks        147
5 days         143
17 years        88
18 years        51
19 years        26
20 years        19
-1 years         6
-3 years         1
24 years         1
23 years         1
-2 years         1
Name: Age upon Outcome, dtype: int64

#### It worked!  Now drop the remaining 5 outlier rows.

In [29]:
indexNames1 = adoptions5[adoptions5["Age upon Outcome"] == "-1 years"].index
indexNames2 = adoptions5[adoptions5["Age upon Outcome"] == "23 years"].index
indexNames3 = adoptions5[adoptions5["Age upon Outcome"] == "-3 years"].index
indexNames4 = adoptions5[adoptions5["Age upon Outcome"] == "24 years"].index
indexNames5 = adoptions5[adoptions5["Age upon Outcome"] == "-2 years"].index

In [30]:
adoptions5.drop(indexNames1, inplace=True)
adoptions5.drop(indexNames2, inplace=True)
adoptions5.drop(indexNames3, inplace=True)
adoptions5.drop(indexNames4, inplace=True)
adoptions5.drop(indexNames5, inplace=True)

In [31]:
adoptions5["Age upon Outcome"].value_counts()

1 year       19328
2 years      18392
2 months     16181
3 years       7430
3 months      6251
1 month       5639
4 years       4436
4 months      4170
5 years       4166
5 months      2994
6 years       2770
6 months      2674
8 years       2430
7 years       2396
3 weeks       2269
2 weeks       2185
8 months      1992
10 years      1980
10 months     1843
7 months      1634
4 weeks       1535
9 years       1352
9 months      1303
12 years       947
1 weeks        935
11 months      809
11 years       774
1 week         701
13 years       624
14 years       419
3 days         384
2 days         354
15 years       345
1 day          285
6 days         249
4 days         228
0 years        167
16 years       156
5 weeks        147
5 days         143
17 years        88
18 years        51
19 years        26
20 years        19
Name: Age upon Outcome, dtype: int64

### Group and Recode Age upon Outcome column

In [32]:
def age (series): 
    if series == "1 year" : 
        return "0-1 years"
    if series == "2 years": 
        return "2-5 years"
    if series == "2 months" : 
        return "0-1 years"
    if series == "3 years" : 
        return "2-5 years"
    if series =="3 months":
        return "0-1 years"
    if series =="1 month":
        return "0-1 years"
    if series =="4 years":
        return "2-5 years"
    if series =="4 months":
        return "0-1 years"
    if series =="5 years":
        return "2-5 years"
    if series =="5 months":
        return "0-1 years"
    if series =="6 years":
        return "6-9 years"
    if series =="6 months":
        return "0-1 years"
    if series =="8 years":
        return "6-9 years"
    if series =="7 years":
        return "6-9 years"
    if series =="3 weeks":
        return "0-1 years"
    if series =="2 weeks":
        return "0-1 years"
    if series =="8 months":
        return "0-1 years"
    if series =="10 years":
        return "10-15 years"
    if series =="10 months":
        return "0-1 years"
    if series =="7 months":
        return "0-1 years"
    if series =="4 weeks":
        return "0-1 years"
    if series =="9 years":
        return "6-9 years"
    if series =="9 months":
        return "0-1 years"
    if series =="12 years":
        return "10-15 years"
    if series =="1 weeks":
        return "0-1 years"
    if series =="11 months":
        return "0-1 years"
    if series =="11 years":
        return "10-15 years"
    if series =="13 years":
        return "10-15 years"
    if series =="14 years":
        return "10-15 years"
    if series =="3 days":
        return "0-1 years"
    if series =="2 days":
        return "0-1 years"
    if series =="15 years":
        return "10-15 years"
    if series =="1 day":
        return "0-1 years"
    if series =="6 days":
        return "0-1 years"
    if series =="4 days":
        return "0-1 years"
    if series =="16 years":
        return "16+ years"
    if series =="5 weeks":
        return "0-1 years"
    if series =="5 days":
        return "0-1 years"
    if series =="17 years":
        return "16+ years"
    if series =="18 years":
        return "16+ years"
    if series =="19 years":
        return "16+ years"
    if series =="20 years":
        return "16+ years"
     
adoptions5["Age"] = adoptions5["Age upon Outcome"].apply(age)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adoptions5["Age"] = adoptions5["Age upon Outcome"].apply(age)


In [33]:
adoptions5["Age"].value_counts()

0-1 years      73532
2-5 years      34424
6-9 years       8948
10-15 years     5089
16+ years        340
Name: Age, dtype: int64

### Drop Age upon Outcome column

In [34]:
adoptions5.drop(["Age upon Outcome"], axis=1, inplace=True)

In [35]:
adoptions5.head()

Unnamed: 0,Outcome Type,Animal Type,Breed,Color,Year,Month,Sex,Age
0,Rto-Adopt,Cat,Domestic Shorthair Mix,Brown Tabby/White,19,May,Male,2-5 years
1,Adoption,Dog,Chihuahua Shorthair Mix,White/Brown,18,Jul,Male,0-1 years
3,Adoption,Dog,Anatol Shepherd/Labrador Retriever,Buff,16,Feb,Male,0-1 years
4,Transfer,Cat,Domestic Shorthair Mix,Orange Tabby,14,Mar,Male,0-1 years
5,Adoption,Dog,Chihuahua Shorthair Mix,Brown,20,Oct,Female,6-9 years


### Recode "Outcome Type" column to Adoption or Not Adoption

In [36]:
adoptions5["Outcome Type"].value_counts()

Adoption           58910
Transfer           36797
Return to Owner    21731
Euthanasia          3687
Died                1033
Rto-Adopt            819
Disposal             137
Missing               65
Relocate               4
Name: Outcome Type, dtype: int64

In [37]:
def adopt (series): 
    if series == "Adoption" : 
        return 1
    else:
        return 0
    
adoptions5["AdoptionYN"] = adoptions5["Outcome Type"].apply(adopt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adoptions5["AdoptionYN"] = adoptions5["Outcome Type"].apply(adopt)


In [38]:
adoptions5.head()

Unnamed: 0,Outcome Type,Animal Type,Breed,Color,Year,Month,Sex,Age,AdoptionYN
0,Rto-Adopt,Cat,Domestic Shorthair Mix,Brown Tabby/White,19,May,Male,2-5 years,0
1,Adoption,Dog,Chihuahua Shorthair Mix,White/Brown,18,Jul,Male,0-1 years,1
3,Adoption,Dog,Anatol Shepherd/Labrador Retriever,Buff,16,Feb,Male,0-1 years,1
4,Transfer,Cat,Domestic Shorthair Mix,Orange Tabby,14,Mar,Male,0-1 years,0
5,Adoption,Dog,Chihuahua Shorthair Mix,Brown,20,Oct,Female,6-9 years,1


In [39]:
adoptions5["AdoptionYN"].value_counts()

0    64291
1    58910
Name: AdoptionYN, dtype: int64

### Drop Outcome Type column

In [40]:
adoptions5.drop(["Outcome Type"], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [41]:
adoptions5.head()

Unnamed: 0,Animal Type,Breed,Color,Year,Month,Sex,Age,AdoptionYN
0,Cat,Domestic Shorthair Mix,Brown Tabby/White,19,May,Male,2-5 years,0
1,Dog,Chihuahua Shorthair Mix,White/Brown,18,Jul,Male,0-1 years,1
3,Dog,Anatol Shepherd/Labrador Retriever,Buff,16,Feb,Male,0-1 years,1
4,Cat,Domestic Shorthair Mix,Orange Tabby,14,Mar,Male,0-1 years,0
5,Dog,Chihuahua Shorthair Mix,Brown,20,Oct,Female,6-9 years,1


### Recode Animal Type to a numeric variable

In [42]:
adoptions5.dtypes

Animal Type    object
Breed          object
Color          object
Year           object
Month          object
Sex            object
Age            object
AdoptionYN      int64
dtype: object

In [43]:
def type_recode (series):
    if series == "Cat":
        return 0
    if series == "Dog":
        return 1
    
adoptions5['typeR'] = adoptions5['Animal Type'].apply(type_recode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adoptions5['typeR'] = adoptions5['Animal Type'].apply(type_recode)


In [44]:
adoptions5.head()

Unnamed: 0,Animal Type,Breed,Color,Year,Month,Sex,Age,AdoptionYN,typeR
0,Cat,Domestic Shorthair Mix,Brown Tabby/White,19,May,Male,2-5 years,0,0
1,Dog,Chihuahua Shorthair Mix,White/Brown,18,Jul,Male,0-1 years,1,1
3,Dog,Anatol Shepherd/Labrador Retriever,Buff,16,Feb,Male,0-1 years,1,1
4,Cat,Domestic Shorthair Mix,Orange Tabby,14,Mar,Male,0-1 years,0,0
5,Dog,Chihuahua Shorthair Mix,Brown,20,Oct,Female,6-9 years,1,1


### Convert data type of Year to integer

In [45]:
adoptions5['Year'] = adoptions5['Year'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adoptions5['Year'] = adoptions5['Year'].astype(int)


In [46]:
adoptions5["Year"].dtypes

dtype('int32')

### Drop "unknown" rows from Sex column

In [47]:
adoptions5["Sex"].value_counts()

Male       61827
Female     56912
Unknown     4462
Name: Sex, dtype: int64

In [48]:
indexNames = adoptions5[adoptions5["Sex"] == "Unknown"].index

In [49]:
adoptions5.drop(indexNames, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [50]:
adoptions5["Sex"].value_counts()

Male      61827
Female    56912
Name: Sex, dtype: int64

### Recode Sex to a numeric variable

In [51]:
def sex_recode (series):
    if series == "Male":
        return 0
    if series == "Female":
        return 1
    
adoptions5['sexR'] = adoptions5['Sex'].apply(sex_recode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adoptions5['sexR'] = adoptions5['Sex'].apply(sex_recode)


In [52]:
adoptions5.head()

Unnamed: 0,Animal Type,Breed,Color,Year,Month,Sex,Age,AdoptionYN,typeR,sexR
0,Cat,Domestic Shorthair Mix,Brown Tabby/White,19,May,Male,2-5 years,0,0,0
1,Dog,Chihuahua Shorthair Mix,White/Brown,18,Jul,Male,0-1 years,1,1,0
3,Dog,Anatol Shepherd/Labrador Retriever,Buff,16,Feb,Male,0-1 years,1,1,0
4,Cat,Domestic Shorthair Mix,Orange Tabby,14,Mar,Male,0-1 years,0,0,0
5,Dog,Chihuahua Shorthair Mix,Brown,20,Oct,Female,6-9 years,1,1,1


### Recode Age to a numeric variable

In [53]:
adoptions5['Age'].value_counts()

0-1 years      70101
2-5 years      33931
6-9 years       8910
10-15 years     5054
16+ years        337
Name: Age, dtype: int64

In [54]:
adoptions5['Age'].dtypes

dtype('O')

In [55]:
def age_recode (series):
    if series == "0-1 years":
        return 0
    if series == "2-5 years":
        return 1
    if series == "6-9 years":
        return 2
    if series == "10-15 years":
        return 3
    if series == "16+ years":
        return 4
    
adoptions5['ageR'] = adoptions5['Age'].apply(age_recode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adoptions5['ageR'] = adoptions5['Age'].apply(age_recode)


In [56]:
adoptions5.head()

Unnamed: 0,Animal Type,Breed,Color,Year,Month,Sex,Age,AdoptionYN,typeR,sexR,ageR
0,Cat,Domestic Shorthair Mix,Brown Tabby/White,19,May,Male,2-5 years,0,0,0,1.0
1,Dog,Chihuahua Shorthair Mix,White/Brown,18,Jul,Male,0-1 years,1,1,0,0.0
3,Dog,Anatol Shepherd/Labrador Retriever,Buff,16,Feb,Male,0-1 years,1,1,0,0.0
4,Cat,Domestic Shorthair Mix,Orange Tabby,14,Mar,Male,0-1 years,0,0,0,0.0
5,Dog,Chihuahua Shorthair Mix,Brown,20,Oct,Female,6-9 years,1,1,1,2.0


In [57]:
adoptions5['ageR'].value_counts()

0.0    70101
1.0    33931
2.0     8910
3.0     5054
4.0      337
Name: ageR, dtype: int64

In [58]:
adoptions5.dtypes

Animal Type     object
Breed           object
Color           object
Year             int32
Month           object
Sex             object
Age             object
AdoptionYN       int64
typeR            int64
sexR             int64
ageR           float64
dtype: object

### Group and Recode Breed column

In [69]:
adoptions5["Breed"].value_counts()

Domestic Shorthair                         37529
Pit Bull Mix                                8472
Labrador Retriever Mix                      6953
Chihuahua Shorthair Mix                     6126
Domestic Medium Hair                        3876
                                           ...  
Australian Cattle Dog/American Foxhound        1
Cavalier Span/Chihuahua Longhair               1
Patterdale Terr/Chihuahua Shorthair            1
Vizsla/Catahoula                               1
Chihuahua Shorthair/Shetland Sheepdog          1
Name: Breed, Length: 2380, dtype: int64

#### Start with Cat breeds

In [70]:
adoptions5.replace(['Abyssinian', 'Abyssinian Mix'], 'Abyssinian', inplace =True)
adoptions5.replace(['American Curl Shorthair', 'American Curl Shorthair Mix', 'American Curl Shorthair/Siamese', 'American Shorthair', 'American Shorthair Mix', 'American Wirehair Mix'], 'American Shorthair', inplace =True)
adoptions5.replace(['Angora Mix', 'Angora/Persian'], 'Angora', inplace =True)
adoptions5.replace(['Balinese', 'Balinese Mix'], 'Balinese', inplace =True)
adoptions5.replace(['Bengal', 'Bengal Mix', 'Bengal/Domestic Shorthair'], 'Bengal', inplace =True)
adoptions5.replace(['British Shorthair', 'British Shorthair Mix'], 'British Shorthair', inplace =True)
adoptions5.replace(['Burmese', 'Burmese Mix'], 'Burmese', inplace =True)
adoptions5.replace(['Devon Rex', 'Devon Rex Mix'], 'Devon Rex', inplace =True)
adoptions5.replace(['Domestic Longhair', 'Domestic Longhair Mix', 'Domestic Longhair/Domestic Longhair', 'Domestic Longhair/Persian', 'Domestic Longhair/Rex', 'Domestic Longhair/Russian Blue', 'Domestic Longhair/Siamese'], 'Domestic Longhair', inplace =True)
adoptions5.replace(['Domestic Medium Hair', 'Domestic Medium Hair Mix', 'Domestic Medium Hair/Maine Coon', 'Domestic Medium Hair/Manx', 'Domestic Medium Hair/Siamese'], 'Domestic Medium Hair', inplace =True)
adoptions5.replace(['Domestic Shorthair', 'Domestic Shorthair Mix', 'Domestic Shorthair/Abyssinian', 'Domestic Shorthair/American Shorthair', 'Domestic Shorthair/Bengal', 'Domestic Shorthair/British Shorthair', 'Domestic Shorthair/Domestic Medium Hair', 'Domestic Shorthair/Domestic Shorthair', 'Domestic Shorthair/Maine Coon', 'Domestic Shorthair/Manx', 'Domestic Shorthair/Siamese'], 'Domestic Shorthair', inplace =True)
adoptions5.replace(['Havana Brown', 'Havana Brown Mix'], 'Havana Brown', inplace =True)
adoptions5.replace(['Exotic Shorthair', 'Exotic Shorthair Mix'], 'Exotic Shorthair', inplace =True)
adoptions5.replace(['Himalayan', 'Himalayan Mix'], 'Himalayan', inplace =True)
adoptions5.replace(['Japanese Bobtail Mix', 'Japanese Bobtail/American Curl Shorthair'], 'Japanese Bobtail', inplace =True)
adoptions5.replace(['Maine Coon', 'Maine Coon Mix'], 'Maine Coon', inplace =True)
adoptions5.replace(['Manx', 'Manx Mix', 'Manx/Domestic Longhair', 'Manx/Domestic Medium Hair', 'Manx/Domestic Shorthair', 'Manx/Siamese'], 'Manx', inplace =True)
adoptions5.replace(['Munchkin Longhair Mix', 'Munchkin Shorthair Mix'], 'Munchkin', inplace =True)
adoptions5.replace(['Oriental Sh', 'Oriental Sh Mix'], 'Oriental Shorthair', inplace =True)
adoptions5.replace(['Persian', 'Persian Mix'], 'Persian', inplace =True)
adoptions5.replace(['Pixiebob Shorthair', 'Pixiebob Shorthair Mix'], 'Pixiebob Shorthair', inplace =True)
adoptions5.replace(['Ragdoll', 'Ragdoll Mix', 'Ragdoll/Munchkin Longhair', 'Ragdoll/Snowshoe'], 'Ragdoll', inplace =True)
adoptions5.replace(['Russian Blue', 'Russian Blue Mix'], 'Russian Blue', inplace =True)
adoptions5.replace(['Scottish Fold Mix', 'Scottish Fold/Domestic Shorthair'], 'Scottish Fold', inplace =True)
adoptions5.replace(['Siamese', 'Siamese Mix', 'Siamese/Angora', 'Siamese/Domestic Shorthair', 'Siamese/Japanese Bobtail'], 'Siamese', inplace =True)
adoptions5.replace(['Snowshoe', 'Snowshoe Mix', 'Snowshoe/Domestic Shorthair', 'Snowshoe/Ragdoll', 'Snowshoe/Siamese'], 'Snowshoe', inplace =True)
adoptions5.replace(['Tonkinese', 'Tonkinese Mix'], 'Tonkinese', inplace =True)
adoptions5.replace(['Turkish Angora', 'Turkish Angora Mix'], 'Turkish Angora', inplace =True)

In [71]:
adoptions5.Breed.nunique()

2380

#### Now Group Dog Breeds

In [73]:
adoptions5.replace(['Affenpinscher', 'Affenpinscher Mix'], 'Affenpinscher', inplace =True)
adoptions5.replace(['Afghan Hound Mix', 'Afghan Hound/German Shepherd', 'Afghan Hound/Labrador Retriever'], 'Afghan Hound', inplace =True)
adoptions5.replace(['Airedale Terrier', 'Airedale Terrier Mix', 'Airedale Terrier/Cairn Terrier', 'Airedale Terrier/Irish Terrier', 'Airedale Terrier/Labrador Retriever', 'Airedale Terrier/Miniature Schnauzer', 'Airedale Terrier/Otterhound', 'Airedale Terrier/Standard Poodle'], 'Airedale Terrier', inplace =True)
adoptions5.replace(['Akbash', 'Akbash Mix', 'Akbash/Great Pyrenees'], 'Akbash', inplace =True)
adoptions5.replace(['Akita', 'Akita Mix', 'Akita/Australian Cattle Dog', 'Akita/Belgian Malinois', 'Akita/Border Collie', 'Akita/Chow Chow', 'Akita/German Shepherd', 'Akita/Great Pyrenees', 'Akita/Labrador Retriever', 'Akita/Mastiff', 'Akita/Pit Bull', 'Akita/Siberian Husky'], 'Akita', inplace =True)
adoptions5.replace(['Alaskan Husky', 'Alaskan Husky Mix', 'Alaskan Husky/Australian Shepherd', 'Alaskan Husky/Border Collie', 'Alaskan Husky/Catahoula', 'Alaskan Husky/Chow Chow', 'Alaskan Husky/Collie Smooth', 'Alaskan Husky/German Shepherd', 'Alaskan Husky/Labrador Retriever', 'Alaskan Husky/Miniature Pinscher'], 'Alaskan Husky', inplace =True)
adoptions5.replace(['Alaskan Malamute', 'Alaskan Malamute Mix', 'Alaskan Malamute/Akita', 'Alaskan Malamute/Alaskan Husky', 'Alaskan Malamute/Australian Kelpie', 'Alaskan Malamute/Border Collie', 'Alaskan Malamute/German Shepherd', 'Alaskan Malamute/Labrador Retriever', 'Alaskan Malamute/Siberian Husky'], 'Alaskan Malamute', inplace =True)
adoptions5.replace(['American Bulldog', 'American Bulldog Mix', 'American Bulldog/American Staffordshire Terrier', 'American Bulldog/Basset Hound', 'American Bulldog/Blue Lacy', 'American Bulldog/Boxer', 'American Bulldog/Chinese Sharpei', 'American Bulldog/Dalmatian', 'American Bulldog/English Bulldog', 'American Bulldog/Great Pyrenees', 'American Bulldog/Labrador Retriever', 'American Bulldog/Mastiff', 'American Bulldog/Pit Bull', 'American Bulldog/Pointer', 'American Bulldog/Queensland Heeler'], 'American Bulldog', inplace =True)
adoptions5.replace(['American Eskimo', 'American Eskimo Mix', 'American Eskimo/Alaskan Husky', 'American Eskimo/Australian Shepherd', 'American Eskimo/Border Collie'], 'American Eskimo', inplace =True)
adoptions5.replace(['American Foxhound', 'American Foxhound Mix', 'American Foxhound/Beagle', 'American Foxhound/Labrador Retriever', 'American Foxhound/Pointer'], 'American Foxhound', inplace =True)
adoptions5.replace(['American Pit Bull Terrier', 'American Pit Bull Terrier Mix', 'American Pit Bull Terrier/American Bulldog', 'American Pit Bull Terrier/American Pit Bull Terrier', 'American Pit Bull Terrier/Basenji', 'American Pit Bull Terrier/Basset Hound', 'American Pit Bull Terrier/Black Mouth Cur', 'American Pit Bull Terrier/Boston Terrier', 'American Pit Bull Terrier/Boxer', 'American Pit Bull Terrier/Catahoula', 'American Pit Bull Terrier/Chinese Sharpei', 'American Pit Bull Terrier/Dachshund', 'American Pit Bull Terrier/Labrador Retriever', 'American Pit Bull Terrier/Pit Bull', 'American Pit Bull Terrier/Pointer', 'American Pit Bull Terrier/Weimaraner'], 'American Pit Bull Terrier', inplace =True)
adoptions5.replace(['American Staffordshire Terrier', 'American Staffordshire Terrier Mix', 'American Staffordshire Terrier/American Bulldog', 'American Staffordshire Terrier/Australian Cattle Dog', 'American Staffordshire Terrier/Boxer', 'American Staffordshire Terrier/Catahoula', 'American Staffordshire Terrier/Chinese Sharpei', 'American Staffordshire Terrier/French Bulldog', 'American Staffordshire Terrier/Jack Russell Terrier', 'American Staffordshire Terrier/Labrador Retriever', 'American Staffordshire Terrier/Pit Bull', 'American Staffordshire Terrier/Plott Hound', 'American Staffordshire Terrier/Rhod Ridgeback', 'American Staffordshire Terrier/Rottweiler'], 'American Staffordshire Terrier', inplace =True)
adoptions5.replace(['Anatol Shepherd', 'Anatol Shepherd Mix', 'Anatol Shepherd/Akita', 'Anatol Shepherd/Alaskan Husky', 'Anatol Shepherd/American Bulldog', 'Anatol Shepherd/Australian Cattle Dog', 'Anatol Shepherd/Australian Shepherd', 'Anatol Shepherd/Belgian Malinois', 'Anatol Shepherd/Black Mouth Cur', 'Anatol Shepherd/Border Collie', 'Anatol Shepherd/Boxer', 'Anatol Shepherd/Catahoula', 'Anatol Shepherd/Chow Chow', 'Anatol Shepherd/Collie Rough', 'Anatol Shepherd/German Shepherd', 'Anatol Shepherd/German Shorthair Pointer', 'Anatol Shepherd/Great Pyrenees', 'Anatol Shepherd/Harrier', 'Anatol Shepherd/Kangal', 'Anatol Shepherd/Labrador Retriever', 'Anatol Shepherd/Mastiff', 'Anatol Shepherd/Pit Bull', 'Anatol Shepherd/Redbone Hound', 'Anatol Shepherd/Rhod Ridgeback', 'Anatol Shepherd/Saluki', 'Anatol Shepherd/Siberian Husky', 'Anatol Shepherd/St. Bernard Smooth Coat'], 'Anatol Shepherd', inplace =True)
adoptions5.replace(['Australian Cattle Dog', 'Australian Cattle Dog Mix', 'Australian Cattle Dog/Akita', 'Australian Cattle Dog/Alaskan Husky', 'Australian Cattle Dog/American Foxhound', 'Australian Cattle Dog/American Pit Bull Terrier', 'Australian Cattle Dog/American Staffordshire Terrier', 'Australian Cattle Dog/Anatol Shepherd', 'Australian Cattle Dog/Australian Cattle Dog', 'Australian Cattle Dog/Australian Kelpie', 'Australian Cattle Dog/Australian Shepherd', 'Australian Cattle Dog/Basenji', 'Australian Cattle Dog/Basset Hound', 'Australian Cattle Dog/Beagle', 'Australian Cattle Dog/Belgian Malinois', 'Australian Cattle Dog/Bernese Mountain Dog', 'Australian Cattle Dog/Black Mouth Cur', 'Australian Cattle Dog/Bloodhound', 'Australian Cattle Dog/Border Collie', 'Australian Cattle Dog/Boston Terrier', 'Australian Cattle Dog/Boxer', 'Australian Cattle Dog/Bull Terrier', 'Australian Cattle Dog/Cairn Terrier', 'Australian Cattle Dog/Cardigan Welsh Corgi', 'Australian Cattle Dog/Catahoula', 'Australian Cattle Dog/Chihuahua Shorthair', 'Australian Cattle Dog/Chinese Crested', 'Australian Cattle Dog/Chinese Sharpei', 'Australian Cattle Dog/Chow Chow', 'Australian Cattle Dog/Collie Rough', 'Australian Cattle Dog/Collie Smooth', 'Australian Cattle Dog/Dachshund', 'Australian Cattle Dog/Dalmatian', 'Australian Cattle Dog/Dutch Shepherd', 'Australian Cattle Dog/English Coonhound', 'Australian Cattle Dog/German Shepherd', 'Australian Cattle Dog/German Shorthair Pointer', 'Australian Cattle Dog/Golden Retriever', 'Australian Cattle Dog/Great Pyrenees', 'Australian Cattle Dog/Jack Russell Terrier', 'Australian Cattle Dog/Labrador Retriever', 'Australian Cattle Dog/Norfolk Terrier', 'Australian Cattle Dog/Parson Russell Terrier', 'Australian Cattle Dog/Patterdale Terr', 'Australian Cattle Dog/Pembroke Welsh Corgi', 'Australian Cattle Dog/Pit Bull', 'Australian Cattle Dog/Plott Hound', 'Australian Cattle Dog/Pointer', 'Australian Cattle Dog/Pug', 'Australian Cattle Dog/Queensland Heeler', 'Australian Cattle Dog/Rat Terrier', 'Australian Cattle Dog/Rhod Ridgeback', 'Australian Cattle Dog/Siberian Husky', 'Australian Cattle Dog/St. Bernard Smooth Coat', 'Australian Cattle Dog/Staffordshire', 'Australian Cattle Dog/Treeing Walker Coonhound', 'Australian Cattle Dog/Vizsla', 'Australian Cattle Dog/Whippet'], 'Australian Cattle Dog', inplace =True)
adoptions5.replace(['Australian Kelpie', 'Australian Kelpie Mix', 'Australian Kelpie/Alaskan Husky', 'Australian Kelpie/American Pit Bull Terrier', 'Australian Kelpie/Australian Cattle Dog', 'Australian Kelpie/Australian Shepherd', 'Australian Kelpie/Beagle', 'Australian Kelpie/Bichon Frise', 'Australian Kelpie/Blue Lacy', 'Australian Kelpie/Border Collie', 'Australian Kelpie/Bull Terrier', 'Australian Kelpie/Cardigan Welsh Corgi', 'Australian Kelpie/Catahoula', 'Australian Kelpie/Chihuahua Longhair', 'Australian Kelpie/Chihuahua Shorthair', 'Australian Kelpie/Chinese Sharpei', 'Australian Kelpie/Dachshund', 'Australian Kelpie/Finnish Spitz', 'Australian Kelpie/German Shepherd', 'Australian Kelpie/Golden Retriever', 'Australian Kelpie/Labrador Retriever', 'Australian Kelpie/Miniature Pinscher', 'Australian Kelpie/Pit Bull', 'Australian Kelpie/Plott Hound', 'Australian Kelpie/Rat Terrier', 'Australian Kelpie/Shiba Inu', 'Australian Kelpie/Siberian Husky', 'Australian Kelpie/Staffordshire', 'Australian Kelpie/Whippet'], 'Australian Kelpie', inplace =True)
adoptions5.replace(['Australian Shepherd', 'Australian Shepherd Mix', 'Australian Shepherd/Alaskan Malamute', 'Australian Shepherd/Anatol Shepherd', 'Australian Shepherd/Australian Cattle Dog', 'Australian Shepherd/Australian Kelpie', 'Australian Shepherd/Basset Hound', 'Australian Shepherd/Beagle', 'Australian Shepherd/Border Collie', 'Australian Shepherd/Boxer', 'Australian Shepherd/Brittany', 'Australian Shepherd/Cardigan Welsh Corgi', 'Australian Shepherd/Catahoula', 'Australian Shepherd/Cavalier Span', 'Australian Shepherd/Chihuahua Longhair', 'Australian Shepherd/Chihuahua Shorthair', 'Australian Shepherd/Chow Chow', 'Australian Shepherd/Cocker Spaniel', 'Australian Shepherd/Collie Rough', 'Australian Shepherd/Collie Smooth', 'Australian Shepherd/Dachshund', 'Australian Shepherd/Dalmatian', 'Australian Shepherd/English Setter', 'Australian Shepherd/English Springer Spaniel', 'Australian Shepherd/Feist', 'Australian Shepherd/Field Spaniel', 'ustralian Shepherd/German Shepherd', 'Australian Shepherd/Golden Retriever', 'Australian Shepherd/Great Pyrenees', 'Australian Shepherd/Greyhound', 'Australian Shepherd/Jack Russell Terrier', 'Australian Shepherd/Labrador Retriever', 'Australian Shepherd/Nova Scotia Duck Tolling Retriever', 'Australian Shepherd/Parson Russell Terrier', 'Australian Shepherd/Pembroke Welsh Corgi', 'Australian Shepherd/Pit Bull', 'Australian Shepherd/Plott Hound', 'Australian Shepherd/Queensland Heeler', 'Australian Shepherd/Rhod Ridgeback', 'Australian Shepherd/Rottweiler', 'Australian Shepherd/Siberian Husky', 'Australian Shepherd/Staffordshire', 'Australian Shepherd/Standard Poodle', 'Australian Shepherd/Unknown'], 'Australian Shepherd', inplace =True)
adoptions5.replace(['Australian Terrier', 'Australian Terrier Mix', 'Australian Terrier/Standard Schnauzer'], 'Australian Terrier', inplace =True)
adoptions5.replace(['Basenji', 'Basenji Mix', 'Basenji/Australian Cattle Dog', 'Basenji/Australian Kelpie', 'Basenji/Beagle', 'Basenji/Carolina Dog', 'Basenji/Chihuahua Longhair', 'Basenji/Chihuahua Shorthair', 'Basenji/Chow Chow', 'Basenji/Jack Russell Terrier', 'Basenji/Labrador Retriever', 'Basenji/Pit Bull', 'Basenji/Rhod Ridgeback'], 'Basenji', inplace =True)
adoptions5.replace(['Basset Hound', 'Basset Hound Mix', 'Basset Hound/American Pit Bull Terrier', 'Basset Hound/Australian Cattle Dog', 'Basset Hound/Australian Shepherd','Basset Hound/Beagle', 'Basset Hound/Beauceron', 'Basset Hound/Black/Tan Hound', 'Basset Hound/Blue Lacy', 'Basset Hound/Border Collie', 'Basset Hound/Boxer', 'Basset Hound/Cairn Terrier', 'Basset Hound/Cardigan Welsh Corgi', 'Basset Hound/Chinese Sharpei', 'Basset Hound/Dachshund', 'Basset Hound/English Cocker Spaniel', 'Basset Hound/English Pointer', 'Basset Hound/French Bulldog', 'Basset Hound/German Shepherd', 'Basset Hound/Golden Retriever', 'Basset Hound/Great Pyrenees', 'Basset Hound/Labrador Retriever', 'Basset Hound/Pembroke Welsh Corgi', 'Basset Hound/Pit Bull', 'Basset Hound/Pointer', 'Basset Hound/Rat Terrier'], 'Basset Hound', inplace =True)
adoptions5.replace(['Beagle', 'Beagle Mix', 'Beagle/American Foxhound', 'Beagle/American Staffordshire Terrier', 'Beagle/Australian Cattle Dog', 'Beagle/Australian Kelpie', 'Beagle/Australian Shepherd', 'Beagle/Basset Hound', 'Beagle/Black Mouth Cur', 'Beagle/Border Collie', 'Beagle/Border Terrier', 'Beagle/Borzoi', 'Beagle/Boston Terrier', 'Beagle/Boxer', 'Beagle/Cairn Terrier', 'Beagle/Cardigan Welsh Corgi', 'Beagle/Catahoula', 'Beagle/Cavalier Span', 'Beagle/Chihuahua Longhair', 'Beagle/Chihuahua Shorthair', 'Beagle/Chinese Sharpei', 'Beagle/Cocker Spaniel', 'Beagle/Dachshund', 'Beagle/Dachshund Longhair', 'Beagle/English Coonhound', 'Beagle/German Shepherd', 'Beagle/German Shorthair Pointer', 'Beagle/Harrier', 'Beagle/Jack Russell Terrier', 'Beagle/Labrador Retriever', 'Beagle/Manchester Terrier', 'Beagle/Mastiff', 'Beagle/Miniature Pinscher', 'Beagle/Parson Russell Terrier', 'Beagle/Pekingese', 'Beagle/Pembroke Welsh Corgi', 'Beagle/Pit Bull', 'Beagle/Plott Hound', 'Beagle/Pointer', 'Beagle/Pug', 'Beagle/Queensland Heeler', 'Beagle/Rat Terrier', 'Beagle/Staffordshire', 'Beagle/Standard Poodle', 'Beagle/Treeing Walker Coonhound', 'Beagle/Vizsla', 'Beagle/Whippet'], 'Beagle', inplace =True)
adoptions5.replace(['Bearded Collie', 'Bearded Collie Mix', 'Bearded Collie/Border Collie'], 'Bearded Collie', inplace =True)
adoptions5.replace(['Beauceron', 'Beauceron Mix', 'Beauceron/Labrador Retriever'], 'Beauceron', inplace =True)
adoptions5.replace(['Belgian Malinois', 'Belgian Malinois Mix', 'Belgian Malinois/Australian Shepherd', 'Belgian Malinois/Cardigan Welsh Corgi', 'Belgian Malinois/German Shepherd', 'Belgian Malinois/Great Pyrenees', 'Belgian Malinois/Labrador Retriever', 'Belgian Malinois/Pit Bull', 'Belgian Malinois/Treeing Cur'], 'Belgian Malinois', inplace =True)
adoptions5.replace(['Belgian Sheepdog', 'Belgian Sheepdog Mix'], 'Belgian Sheepdog', inplace =True)
adoptions5.replace(['Belgian Tervuren Mix', 'Belgian Tervuren/German Shepherd'], 'Belgian Tervuren', inplace =True)
adoptions5.replace(['Bernese Mountain Dog', 'Bernese Mountain Dog Mix', 'Bernese Mountain Dog/Great Pyrenees', 'Bernese Mountain Dog/Rottweiler'], 'Bernese Mountain Dog', inplace =True)
adoptions5.replace(['Bichon Frise', 'Bichon Frise Mix', 'Bichon Frise/Lhasa Apso', 'Bichon Frise/Miniature Poodle', 'Bichon Frise/Pekingese', 'Bichon Frise/Toy Poodle'], 'Bichon Frise', inplace =True)
adoptions5.replace(['Black Mouth Cur', 'Black Mouth Cur Mix', 'Black Mouth Cur/American Staffordshire Terrier', 'Black Mouth Cur/Anatol Shepherd', 'Black Mouth Cur/Australian Cattle Dog', 'Black Mouth Cur/Basset Hound', 'Black Mouth Cur/Beagle', 'Black Mouth Cur/Belgian Malinois', 'Black Mouth Cur/Blue Lacy', 'Black Mouth Cur/Boxer', 'Black Mouth Cur/Catahoula', 'Black Mouth Cur/Chinese Sharpei', 'Black Mouth Cur/Chow Chow', 'Black Mouth Cur/Dachshund', 'Black Mouth Cur/Doberman Pinsch', 'Black Mouth Cur/German Shepherd', 'Black Mouth Cur/Golden Retriever', 'Black Mouth Cur/Great Dane', 'Black Mouth Cur/Greyhound', 'Black Mouth Cur/Labrador Retriever', 'Black Mouth Cur/Mastiff', 'Black Mouth Cur/Pit Bull', 'Black Mouth Cur/Plott Hound', 'Black Mouth Cur/Pointer', 'Black Mouth Cur/Rhod Ridgeback', 'Black Mouth Cur/Siberian Husky', 'Black Mouth Cur/Staffordshire'], 'Black Mouth Cur', inplace =True)

In [74]:
adoptions5.Breed.nunique()

2018

#### Continue Grouping Dog Breeds

In [75]:
adoptions5.Color.nunique()

559

### Convert adoptions5 to csv for exploratory analysis in Tableau

In [58]:
os.makedirs('data', exist_ok=True)  # create "data" folder in Final Projects folder
adoptions5.to_csv('data/adoptions5.csv')   # convert adoptions5 from a pandas dataframe to a csv file and save in "data" folder

## Analyses

[Analyses in R](http://localhost:8888/notebooks/OneDrive/Documents/GitHub/FinalProject/FinalProject-Analyses.ipynb)