## ML Project

In [215]:
import pandas as pd
import os
import matplotlib.pyplot as plt
from datetime import date
import numpy as np

In [216]:
file = "aac_shelter_outcomes.csv"

In [217]:
df = pd.read_csv(file)

In [218]:
df['animal_type'].value_counts()

Dog          44242
Cat          29422
Other         4249
Bird           334
Livestock        9
Name: animal_type, dtype: int64

In [219]:
df.columns

Index(['age_upon_outcome', 'animal_id', 'animal_type', 'breed', 'color',
       'date_of_birth', 'datetime', 'monthyear', 'name', 'outcome_subtype',
       'outcome_type', 'sex_upon_outcome'],
      dtype='object')

In [220]:
df = df.loc[(df['animal_type'] == 'Dog') | (df['animal_type'] == 'Cat')]

In [221]:
dog_df = df.loc[df['animal_type'] == 'Dog']

In [222]:
len(dog_df['breed'].unique())

1893

In [223]:
cat_df = df.loc[df['animal_type'] == 'Cat']

In [224]:
len(cat_df['breed'].unique())

79

In [225]:
table = pd.pivot_table(dog_df, values='animal_id', index=['breed'], aggfunc='count')
table = table.reset_index()
table = table.rename(columns={"animal_id": "count"})
table = table.sort_values(by='count', ascending=False)

In [226]:
new_table = table.head(10)
top_10_breeds = new_table['breed'].to_list()
top_10_breeds

['Pit Bull Mix',
 'Chihuahua Shorthair Mix',
 'Labrador Retriever Mix',
 'German Shepherd Mix',
 'Australian Cattle Dog Mix',
 'Dachshund Mix',
 'Boxer Mix',
 'Miniature Poodle Mix',
 'Border Collie Mix',
 'Catahoula Mix']

## Clean Dog Dataframe

In [227]:
dog_df.columns

Index(['age_upon_outcome', 'animal_id', 'animal_type', 'breed', 'color',
       'date_of_birth', 'datetime', 'monthyear', 'name', 'outcome_subtype',
       'outcome_type', 'sex_upon_outcome'],
      dtype='object')

In [228]:
dog_df['outcome_subtype'].value_counts()

Partner                9516
Foster                 2162
Suffering               684
Aggressive              501
Offsite                 254
Behavior                142
In Kennel                87
Rabies Risk              85
Medical                  60
At Vet                   23
In Foster                23
Court/Investigation      18
Possible Theft            7
In Surgery                7
Enroute                   4
Barn                      1
Snr                       1
Name: outcome_subtype, dtype: int64

In [229]:
dog_df.head()

Unnamed: 0,age_upon_outcome,animal_id,animal_type,breed,color,date_of_birth,datetime,monthyear,name,outcome_subtype,outcome_type,sex_upon_outcome
1,1 year,A666430,Dog,Beagle Mix,White/Brown,2012-11-06T00:00:00,2013-11-07T11:47:00,2013-11-07T11:47:00,Lucy,Partner,Transfer,Spayed Female
2,1 year,A675708,Dog,Pit Bull,Blue/White,2013-03-31T00:00:00,2014-06-03T14:20:00,2014-06-03T14:20:00,*Johnny,,Adoption,Neutered Male
3,9 years,A680386,Dog,Miniature Schnauzer Mix,White,2005-06-02T00:00:00,2014-06-15T15:50:00,2014-06-15T15:50:00,Monday,Partner,Transfer,Neutered Male
5,4 months,A664462,Dog,Leonberger Mix,Brown/White,2013-06-03T00:00:00,2013-10-07T13:06:00,2013-10-07T13:06:00,*Edgar,Partner,Transfer,Intact Male
7,3 years,A692618,Dog,Chihuahua Shorthair Mix,Brown,2011-11-23T00:00:00,2014-12-08T15:55:00,2014-12-08T15:55:00,*Ella,Partner,Transfer,Spayed Female


In [230]:
# clean date of birth and datetime columns
dog_df['date_of_birth'] = dog_df['date_of_birth'].str.split('T', n=1, expand=True)
dog_df['datetime'] = dog_df['datetime'].str.split('T', n=1, expand=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [231]:
# change date of birth and datetime types to date time
dog_df['date_of_birth'] = pd.to_datetime(dog_df['date_of_birth'])
dog_df['datetime'] = pd.to_datetime(dog_df['datetime'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [232]:
dog_df.dtypes

age_upon_outcome            object
animal_id                   object
animal_type                 object
breed                       object
color                       object
date_of_birth       datetime64[ns]
datetime            datetime64[ns]
monthyear                   object
name                        object
outcome_subtype             object
outcome_type                object
sex_upon_outcome            object
dtype: object

In [233]:
dog_df.head()

Unnamed: 0,age_upon_outcome,animal_id,animal_type,breed,color,date_of_birth,datetime,monthyear,name,outcome_subtype,outcome_type,sex_upon_outcome
1,1 year,A666430,Dog,Beagle Mix,White/Brown,2012-11-06,2013-11-07,2013-11-07T11:47:00,Lucy,Partner,Transfer,Spayed Female
2,1 year,A675708,Dog,Pit Bull,Blue/White,2013-03-31,2014-06-03,2014-06-03T14:20:00,*Johnny,,Adoption,Neutered Male
3,9 years,A680386,Dog,Miniature Schnauzer Mix,White,2005-06-02,2014-06-15,2014-06-15T15:50:00,Monday,Partner,Transfer,Neutered Male
5,4 months,A664462,Dog,Leonberger Mix,Brown/White,2013-06-03,2013-10-07,2013-10-07T13:06:00,*Edgar,Partner,Transfer,Intact Male
7,3 years,A692618,Dog,Chihuahua Shorthair Mix,Brown,2011-11-23,2014-12-08,2014-12-08T15:55:00,*Ella,Partner,Transfer,Spayed Female


In [234]:
# calculate the age in months for each row
dog_df['age_months'] = ((dog_df.datetime - dog_df.date_of_birth)/np.timedelta64(1, 'M'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [235]:
dog_df.head()

Unnamed: 0,age_upon_outcome,animal_id,animal_type,breed,color,date_of_birth,datetime,monthyear,name,outcome_subtype,outcome_type,sex_upon_outcome,age_months
1,1 year,A666430,Dog,Beagle Mix,White/Brown,2012-11-06,2013-11-07,2013-11-07T11:47:00,Lucy,Partner,Transfer,Spayed Female,12.024888
2,1 year,A675708,Dog,Pit Bull,Blue/White,2013-03-31,2014-06-03,2014-06-03T14:20:00,*Johnny,,Adoption,Neutered Male,14.094745
3,9 years,A680386,Dog,Miniature Schnauzer Mix,White,2005-06-02,2014-06-15,2014-06-15T15:50:00,Monday,Partner,Transfer,Neutered Male,108.421117
5,4 months,A664462,Dog,Leonberger Mix,Brown/White,2013-06-03,2013-10-07,2013-10-07T13:06:00,*Edgar,Partner,Transfer,Intact Male,4.139715
7,3 years,A692618,Dog,Chihuahua Shorthair Mix,Brown,2011-11-23,2014-12-08,2014-12-08T15:55:00,*Ella,Partner,Transfer,Spayed Female,36.501776


In [236]:
#filter dog df to the following breeds
dog_df = dog_df.loc[(dog_df['breed']== 'Pit Bull Mix') |
                   (dog_df['breed']== 'Chihuahua Shorthair Mix') |
                   (dog_df['breed']== 'Labrador Retriever Mix') |
                    (dog_df['breed']== 'German Shepherd Mix') |
                    (dog_df['breed']== 'Australian Cattle Dog Mix') |
                    (dog_df['breed']== 'Dachshund Mix') |
                    (dog_df['breed']== 'Boxer Mix') |
                    (dog_df['breed']== 'Miniature Poodle Mix') |
                    (dog_df['breed']== 'Border Collie Mix') |
                    (dog_df['breed']== 'Catahoula Mix')]

dog_df = dog_df.loc[(dog_df['sex_upon_outcome'] != 'Unknown')]

In [237]:
dog_df['sex_upon_outcome'].value_counts()

Neutered Male    8935
Spayed Female    8011
Intact Male      2402
Intact Female    2169
Name: sex_upon_outcome, dtype: int64

In [238]:
# split sex_upon_outcome column to 2 separate columns: gender and neutered/inact/spayed
new=dog_df['sex_upon_outcome'].str.split(" ", n=1, expand=True)
dog_df["gender"]= new[1] 
dog_df["neutered_intact_spayed"]=new[0]

In [239]:
dog_df.head()

Unnamed: 0,age_upon_outcome,animal_id,animal_type,breed,color,date_of_birth,datetime,monthyear,name,outcome_subtype,outcome_type,sex_upon_outcome,age_months,gender,neutered_intact_spayed
7,3 years,A692618,Dog,Chihuahua Shorthair Mix,Brown,2011-11-23,2014-12-08,2014-12-08T15:55:00,*Ella,Partner,Transfer,Spayed Female,36.501776,Female,Spayed
14,8 years,A690350,Dog,Labrador Retriever Mix,Black,2006-10-18,2014-10-26,2014-10-26T18:20:00,Shy,,Return to Owner,Neutered Male,96.26481,Male,Neutered
16,1 year,A674298,Dog,Pit Bull Mix,Brown Brindle/White,2013-03-11,2014-04-16,2014-04-16T12:51:00,*Newt,Partner,Transfer,Neutered Male,13.174809,Male,Neutered
27,2 months,A667311,Dog,Labrador Retriever Mix,Yellow,2013-09-01,2013-11-19,2013-11-19T18:30:00,Promise,,Adoption,Spayed Female,2.595536,Female,Spayed
28,4 months,A690699,Dog,Chihuahua Shorthair Mix,Blue/Tan,2014-06-17,2014-11-04,2014-11-04T18:03:00,Minnie,Partner,Transfer,Intact Female,4.599684,Female,Intact


In [240]:
#function to group colors 
def color_mapper(color_string):
    split_array = color_string.split('/')
    first_color = split_array[0]
    if (len(split_array) > 1):
        first_color_split = first_color.split(' ')
        return first_color_split[0] + ' Mix'
    else:
        first_color_split = first_color.split(' ')
        return first_color_split[0]

# print(color_mapper('Black'))
# print(color_mapper('Black/Brown'))
# print(color_mapper('Black Blue/White'))
# print(color_mapper('Brown Tiger'))

In [241]:
#apply color function
dog_df['color'] = df['color'].apply(color_mapper)
dog_df

Unnamed: 0,age_upon_outcome,animal_id,animal_type,breed,color,date_of_birth,datetime,monthyear,name,outcome_subtype,outcome_type,sex_upon_outcome,age_months,gender,neutered_intact_spayed
7,3 years,A692618,Dog,Chihuahua Shorthair Mix,Brown,2011-11-23,2014-12-08,2014-12-08T15:55:00,*Ella,Partner,Transfer,Spayed Female,36.501776,Female,Spayed
14,8 years,A690350,Dog,Labrador Retriever Mix,Black,2006-10-18,2014-10-26,2014-10-26T18:20:00,Shy,,Return to Owner,Neutered Male,96.264810,Male,Neutered
16,1 year,A674298,Dog,Pit Bull Mix,Brown Mix,2013-03-11,2014-04-16,2014-04-16T12:51:00,*Newt,Partner,Transfer,Neutered Male,13.174809,Male,Neutered
27,2 months,A667311,Dog,Labrador Retriever Mix,Yellow,2013-09-01,2013-11-19,2013-11-19T18:30:00,Promise,,Adoption,Spayed Female,2.595536,Female,Spayed
28,4 months,A690699,Dog,Chihuahua Shorthair Mix,Blue Mix,2014-06-17,2014-11-04,2014-11-04T18:03:00,Minnie,Partner,Transfer,Intact Female,4.599684,Female,Intact
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78246,2 years,A738395,Dog,Chihuahua Shorthair Mix,Black Mix,2015-11-14,2018-02-01,2018-02-01T18:23:00,Maxie,,Adoption,Neutered Male,26.612456,Male,Neutered
78247,2 years,A761266,Dog,Border Collie Mix,Black Mix,2015-11-01,2018-02-01,2018-02-01T18:32:00,Baily,,Return to Owner,Spayed Female,27.039570,Female,Spayed
78248,1 year,A725872,Dog,German Shepherd Mix,Tan Mix,2016-02-03,2018-02-01,2018-02-01T18:20:00,Lova,,Return to Owner,Intact Female,23.951210,Female,Intact
78249,2 years,A761265,Dog,Border Collie Mix,Black Mix,2015-11-01,2018-02-01,2018-02-01T18:31:00,Lucas,,Return to Owner,Neutered Male,27.039570,Male,Neutered


In [242]:
len(dog_df['color'].unique())

36

In [243]:
#clean colors 
color = {'Orange': 'Apricot',
         'Liver Mix': 'Brown Mix',
         'Liver': 'Brown',
         'Buff': 'Brown',
         "Buff Mix": 'Brown Mix',
         "Cream": "Yellow",
         "Cream Mix": "Yellow",
         "Fawn": "Yellow",
         "Fawn Mix": "Yellow Mix",
         "Gold": "Yellow",
         "Gold Mix": "Yellow Mix",
         "Sable": 'Brown',
         "Sable Mix": "Brown Mix",
         "Silver Mix": "Gray",
         "Chocolate": "Brown",
         "Chocolate Mix": "Brown Mix"}

dog_df['color'] = dog_df['color'].replace(color)

In [244]:
dog_df['color'].value_counts()

Black Mix       4397
Brown Mix       3463
White Mix       2290
Brown           2069
Tan Mix         1632
Black           1527
Blue Mix        1129
Tan             1065
White            859
Yellow           707
Tricolor         601
Red Mix          519
Red              457
Yellow Mix       343
Blue             322
Gray Mix          39
Tricolor Mix      37
Apricot           27
Gray              25
Apricot Mix       10
Name: color, dtype: int64

In [245]:
#creating color list
color_weights = set(dog_df['color'].array)
color_weights = list(color_weights)
color_weights

['Red Mix',
 'Apricot',
 'Apricot Mix',
 'White Mix',
 'Gray Mix',
 'Yellow Mix',
 'Tricolor',
 'Gray',
 'White',
 'Tricolor Mix',
 'Blue',
 'Tan Mix',
 'Brown Mix',
 'Yellow',
 'Blue Mix',
 'Black',
 'Tan',
 'Brown',
 'Red',
 'Black Mix']

In [246]:
#apply color weights
dog_df['color_weights'] = dog_df['color'].apply(lambda color: color_weights.index(color))
dog_df

Unnamed: 0,age_upon_outcome,animal_id,animal_type,breed,color,date_of_birth,datetime,monthyear,name,outcome_subtype,outcome_type,sex_upon_outcome,age_months,gender,neutered_intact_spayed,color_weights
7,3 years,A692618,Dog,Chihuahua Shorthair Mix,Brown,2011-11-23,2014-12-08,2014-12-08T15:55:00,*Ella,Partner,Transfer,Spayed Female,36.501776,Female,Spayed,17
14,8 years,A690350,Dog,Labrador Retriever Mix,Black,2006-10-18,2014-10-26,2014-10-26T18:20:00,Shy,,Return to Owner,Neutered Male,96.264810,Male,Neutered,15
16,1 year,A674298,Dog,Pit Bull Mix,Brown Mix,2013-03-11,2014-04-16,2014-04-16T12:51:00,*Newt,Partner,Transfer,Neutered Male,13.174809,Male,Neutered,12
27,2 months,A667311,Dog,Labrador Retriever Mix,Yellow,2013-09-01,2013-11-19,2013-11-19T18:30:00,Promise,,Adoption,Spayed Female,2.595536,Female,Spayed,13
28,4 months,A690699,Dog,Chihuahua Shorthair Mix,Blue Mix,2014-06-17,2014-11-04,2014-11-04T18:03:00,Minnie,Partner,Transfer,Intact Female,4.599684,Female,Intact,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78246,2 years,A738395,Dog,Chihuahua Shorthair Mix,Black Mix,2015-11-14,2018-02-01,2018-02-01T18:23:00,Maxie,,Adoption,Neutered Male,26.612456,Male,Neutered,19
78247,2 years,A761266,Dog,Border Collie Mix,Black Mix,2015-11-01,2018-02-01,2018-02-01T18:32:00,Baily,,Return to Owner,Spayed Female,27.039570,Female,Spayed,19
78248,1 year,A725872,Dog,German Shepherd Mix,Tan Mix,2016-02-03,2018-02-01,2018-02-01T18:20:00,Lova,,Return to Owner,Intact Female,23.951210,Female,Intact,11
78249,2 years,A761265,Dog,Border Collie Mix,Black Mix,2015-11-01,2018-02-01,2018-02-01T18:31:00,Lucas,,Return to Owner,Neutered Male,27.039570,Male,Neutered,19


In [247]:
#clean dataframe to only selected columns
dog_df = dog_df[['breed', 'color', 'outcome_type', 'age_months', 'gender', 
                 'neutered_intact_spayed', 'color_weights']]

In [248]:
#drop any NaN rows
dog_df = dog_df.dropna()

In [249]:
dog_df.count()

breed                     21515
color                     21515
outcome_type              21515
age_months                21515
gender                    21515
neutered_intact_spayed    21515
color_weights             21515
dtype: int64

In [250]:
dog_df.columns

Index(['breed', 'color', 'outcome_type', 'age_months', 'gender',
       'neutered_intact_spayed', 'color_weights'],
      dtype='object')

In [251]:
#create breed list
breed_list = set(dog_df['breed'].array)
breed_list = list(breed_list)

#create breed weights
dog_df['breed_weights'] = dog_df['breed'].apply(lambda breed: breed_list.index(breed))
dog_df.head()
dog_df['breed_weights'].value_counts()

1    6113
7    4707
2    4562
3    1878
6    1051
9     783
5     662
8     645
0     639
4     475
Name: breed_weights, dtype: int64

In [252]:
dog_df.head()

Unnamed: 0,breed,color,outcome_type,age_months,gender,neutered_intact_spayed,color_weights,breed_weights
7,Chihuahua Shorthair Mix,Brown,Transfer,36.501776,Female,Spayed,17,7
14,Labrador Retriever Mix,Black,Return to Owner,96.26481,Male,Neutered,15,2
16,Pit Bull Mix,Brown Mix,Transfer,13.174809,Male,Neutered,12,1
27,Labrador Retriever Mix,Yellow,Adoption,2.595536,Female,Spayed,13,2
28,Chihuahua Shorthair Mix,Blue Mix,Transfer,4.599684,Female,Intact,14,7


In [253]:
dog_df['outcome_type'].value_counts()
dog_df.count()

breed                     21515
color                     21515
outcome_type              21515
age_months                21515
gender                    21515
neutered_intact_spayed    21515
color_weights             21515
breed_weights             21515
dtype: int64

In [254]:
dog_df['outcome_type'].value_counts()

Adoption           9778
Return to Owner    6186
Transfer           4541
Euthanasia          860
Died                 67
Rto-Adopt            62
Missing              14
Disposal              7
Name: outcome_type, dtype: int64

In [255]:
#clean outcome column
outcome = {'Rto-Adopt': 'Adoption/Return to Owner',
           'Disposal': 'Died/Euthanasia',
           'Died': 'Died/Euthanasia',
           'Euthanasia': 'Died/Euthanasia',
           'Adoption': 'Adoption/Return to Owner',
           'Return to Owner': 'Adoption/Return to Owner'}

dog_df['outcome_type'] = dog_df['outcome_type'].replace(outcome)
dog_df['outcome_type'].value_counts()

Adoption/Return to Owner    16026
Transfer                     4541
Died/Euthanasia               934
Missing                        14
Name: outcome_type, dtype: int64

In [289]:
total = 4541 + 934 + 14
total

5489

In [256]:
#outcome list
outcome = set(dog_df['outcome_type'].array)
outcome_list = list(outcome)
outcome_list

#create outcome weights
dog_df['outcome_weights'] = dog_df['outcome_type'].apply(lambda outcome: outcome_list.index(outcome))
dog_df.head()
dog_df['outcome_weights'].value_counts()

1    16026
2     4541
0      934
3       14
Name: outcome_weights, dtype: int64

In [257]:
dog_df.head()
dog_df=dog_df.loc[dog_df['outcome_type'] != 'Missing']

dog_df['outcome_type'].value_counts()

Adoption/Return to Owner    16026
Transfer                     4541
Died/Euthanasia               934
Name: outcome_type, dtype: int64

In [258]:
#gender list
gender = set(dog_df['gender'].array)
gender_list = list(gender)
gender_list

#create gender weights
dog_df['gender_weights'] = dog_df['gender'].apply(lambda gender: gender_list.index(gender))
dog_df.head()
dog_df['gender_weights'].value_counts()

1    11329
0    10172
Name: gender_weights, dtype: int64

In [259]:
#clean outcome column
neuter = {'Spayed': 'Fixed',
         'Neutered': 'Fixed'}

dog_df['neutered_intact_spayed'] = dog_df['neutered_intact_spayed'].replace(neuter)
dog_df['neutered_intact_spayed'].value_counts()

Fixed     16938
Intact     4563
Name: neutered_intact_spayed, dtype: int64

In [260]:
#neutered list
neuter = set(dog_df['neutered_intact_spayed'].array)
neuter_list = list(neuter)
neuter_list

#create gender weights
dog_df['fixed_weights'] = dog_df['neutered_intact_spayed'].apply(lambda neuter: neuter_list.index(neuter))
dog_df.head()
dog_df['fixed_weights'].value_counts()

1    16938
0     4563
Name: fixed_weights, dtype: int64

In [261]:
dog_df.head()

Unnamed: 0,breed,color,outcome_type,age_months,gender,neutered_intact_spayed,color_weights,breed_weights,outcome_weights,gender_weights,fixed_weights
7,Chihuahua Shorthair Mix,Brown,Transfer,36.501776,Female,Fixed,17,7,2,0,1
14,Labrador Retriever Mix,Black,Adoption/Return to Owner,96.26481,Male,Fixed,15,2,1,1,1
16,Pit Bull Mix,Brown Mix,Transfer,13.174809,Male,Fixed,12,1,2,1,1
27,Labrador Retriever Mix,Yellow,Adoption/Return to Owner,2.595536,Female,Fixed,13,2,1,0,1
28,Chihuahua Shorthair Mix,Blue Mix,Transfer,4.599684,Female,Intact,14,7,2,0,0


In [262]:
dog_df.columns

Index(['breed', 'color', 'outcome_type', 'age_months', 'gender',
       'neutered_intact_spayed', 'color_weights', 'breed_weights',
       'outcome_weights', 'gender_weights', 'fixed_weights'],
      dtype='object')

In [264]:
test_df = dog_df[['outcome_weights','age_months','color','color_weights', 'breed_weights', 
                       'gender_weights', 'fixed_weights','gender', 'breed', 'neutered_intact_spayed']]
test_df[test_df['fixed_weights']==2]

Unnamed: 0,outcome_weights,age_months,color,color_weights,breed_weights,gender_weights,fixed_weights,gender,breed,neutered_intact_spayed


## Final Dog Dataframe

In [265]:
final_dog_df = dog_df[['outcome_weights','age_months','color_weights', 'breed_weights', 
                       'gender_weights', 'fixed_weights']]

In [266]:
final_dog_df.head()

Unnamed: 0,outcome_weights,age_months,color_weights,breed_weights,gender_weights,fixed_weights
7,2,36.501776,17,7,0,1
14,1,96.26481,15,2,1,1
16,2,13.174809,12,1,1,1
27,1,2.595536,13,2,0,1
28,2,4.599684,14,7,0,0


In [267]:
final_dog_df['outcome_weights'] = final_dog_df['outcome_weights'] + 1
final_dog_df['color_weights'] = final_dog_df['color_weights'] + 1
final_dog_df['breed_weights'] = final_dog_df['breed_weights'] + 1
final_dog_df['gender_weights'] = final_dog_df['gender_weights'] + 1
final_dog_df['fixed_weights'] = final_dog_df['fixed_weights'] + 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

In [268]:
#convert dataframe to csv file
final_dog_df.to_csv('final_data.csv', index=False)

## ML

In [269]:
feature_names = ["age_months", 'color_weights', 'breed_weights', 'gender_weights', 'fixed_weights']
X = final_dog_df[feature_names]
y = final_dog_df['outcome_weights']
X

Unnamed: 0,age_months,color_weights,breed_weights,gender_weights,fixed_weights
7,36.501776,18,8,1,2
14,96.264810,16,3,2,2
16,13.174809,13,2,2,2
27,2.595536,14,3,1,2
28,4.599684,15,8,1,1
...,...,...,...,...,...
78246,26.612456,20,8,2,2
78247,27.039570,20,1,1,2
78248,23.951210,12,4,1,1
78249,27.039570,20,1,2,2


In [270]:
final_dog_df.head()

Unnamed: 0,outcome_weights,age_months,color_weights,breed_weights,gender_weights,fixed_weights
7,3,36.501776,18,8,1,2
14,2,96.26481,16,3,2,2
16,3,13.174809,13,2,2,2
27,2,2.595536,14,3,1,2
28,3,4.599684,15,8,1,1


## Creating training and test sets

In [271]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

## Applying Scaling

In [272]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Build Models

In [273]:
## Logistic Regression
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(X_train_scaled, y_train)

print('Accuracy of logistic regression on training', logreg.score(X_train_scaled, y_train))
print('Accuracy of logistic regression on testing', logreg.score(X_test_scaled, y_test))

Accuracy of logistic regression on training 0.7701705426356589
Accuracy of logistic regression on testing 0.7771577380952381




In [274]:
## Decision Tree
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(X_train_scaled, y_train)

print('Accuracy of Decision tree on training', dt.score(X_train_scaled, y_train))
print('Accuracy of Decision tree on testing', dt.score(X_test_scaled, y_test))

Accuracy of Decision tree on training 0.9716589147286822
Accuracy of Decision tree on testing 0.6971726190476191


In [280]:
# Setting max decision tree depth to help avoid overfitting
# for num in range(1,15):
dt2 = DecisionTreeClassifier(max_depth=7)
dt2.fit(X_train_scaled, y_train)
# print(f'{num}')
print('Accuracy of Decision tree on training', dt2.score(X_train_scaled, y_train))
print('Accuracy of Decision tree on testing', dt2.score(X_test_scaled, y_test))
    
#7 branches

Accuracy of Decision tree on training 0.7950387596899224
Accuracy of Decision tree on testing 0.7937127976190477


In [276]:
# K-nearest neighbor 
from sklearn.neighbors import KNeighborsClassifier

for k in range(1,10):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    print('k = ', k)
    print('Accuracy of Knn on training', knn.score(X_train_scaled, y_train))
    print('Accuracy of Knn on testing', knn.score(X_test_scaled, y_test))

k =  1
Accuracy of Knn on training 0.9673798449612403
Accuracy of Knn on testing 0.6906622023809523
k =  2
Accuracy of Knn on training 0.8393178294573643
Accuracy of Knn on testing 0.7146577380952381
k =  3
Accuracy of Knn on training 0.8328682170542636
Accuracy of Knn on testing 0.7209821428571429
k =  4
Accuracy of Knn on training 0.8132713178294574
Accuracy of Knn on testing 0.75390625
k =  5
Accuracy of Knn on training 0.8068837209302325
Accuracy of Knn on testing 0.7540922619047619
k =  6
Accuracy of Knn on training 0.8014883720930233
Accuracy of Knn on testing 0.7632068452380952
k =  7
Accuracy of Knn on training 0.7988217054263566
Accuracy of Knn on testing 0.7667410714285714
k =  8
Accuracy of Knn on training 0.7932403100775194
Accuracy of Knn on testing 0.7693452380952381
k =  9
Accuracy of Knn on training 0.7933643410852713
Accuracy of Knn on testing 0.7680431547619048


In [281]:
# Linear Discriminant Analysis 
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()
lda.fit(X_train_scaled,y_train)
print('Accuracy of Linear Discriminant on training', lda.score(X_train_scaled, y_train))
print('Accuracy of Linear Discriminant on testing', lda.score(X_test_scaled, y_test))

Accuracy of Linear Discriminant on training 0.7540465116279069
Accuracy of Linear Discriminant on testing 0.7646949404761905


In [282]:
# Gaussian Naive Bayes 
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train_scaled, y_train)
print('Accuracy of GNB on training', gnb.score(X_train_scaled, y_train))
print('Accuracy of GNB on testing', gnb.score(X_test_scaled, y_test))

Accuracy of GNB on training 0.7506976744186047
Accuracy of GNB on testing 0.7606026785714286


In [283]:
# Support Vector Machine
from sklearn.svm import SVC
svm = SVC()
svm.fit(X_train_scaled, y_train)
print('Accuracy of SVM on training', svm.score(X_train_scaled, y_train))
print('Accuracy of SVM on testing', svm.score(X_test_scaled, y_test))



Accuracy of SVM on training 0.7726511627906977
Accuracy of SVM on testing 0.7816220238095238


In [284]:
# "age_months", 'color_weights', 'breed_weights', 'gender_weights', 'neuter_weights'
for i in outcome_list:
#     print(i)
    index_num = outcome_list.index(i)
    print(f' {i}: {index_num} <br>')
    
for i in breed_list:
#     print(i)
    index_num = breed_list.index(i)
    print(f' {i}: {index_num} <br>')
    
for i in gender_list:
#     print(i)
    index_num = gender_list.index(i)
    print(f' {i}: {index_num} <br>')
    
for i in neuter_list:
#     print(i)
    index_num = neuter_list.index(i)
    print(f' {i}: {index_num} <br>')
    


 Died/Euthanasia: 0 <br>
 Adoption/Return to Owner: 1 <br>
 Transfer: 2 <br>
 Missing: 3 <br>
 Border Collie Mix: 0 <br>
 Pit Bull Mix: 1 <br>
 Labrador Retriever Mix: 2 <br>
 German Shepherd Mix: 3 <br>
 Catahoula Mix: 4 <br>
 Boxer Mix: 5 <br>
 Australian Cattle Dog Mix: 6 <br>
 Chihuahua Shorthair Mix: 7 <br>
 Miniature Poodle Mix: 8 <br>
 Dachshund Mix: 9 <br>
 Female: 0 <br>
 Male: 1 <br>
 Intact: 0 <br>
 Fixed: 1 <br>


## Weight Legend

<b> Outcome </b> <br>
 Died/Euthanasia: 1 <br>
 Adoption/Return to Owner: 2 <br>
 Transfer: 3 <br>
 Missing: 4 <br>

 
<b> Breed </b> <br>
Catahoula Mix: 1 <br>
 Dachshund Mix: 2 <br>
 Border Collie Mix: 3 <br>
 Pit Bull Mix: 4 <br>
 Boxer Mix: 5 <br>
 Miniature Poodle Mix: 6 <br>
 Australian Cattle Dog Mix: 7 <br>
 Labrador Retriever Mix: 8 <br>
 German Shepherd Mix: 9 <br>
 Chihuahua Shorthair Mix: 10 <br>
 
<b> Gender </b> <br>
 Male: 1 <br>
 Female: 2 <br>
 
<b> Neuter </b> <br>
 Neutered: 1 <br>
 Spayed: 2 <br>
 Intact: 3 <br>

In [285]:
X_train_scaled

array([[0.22409569, 0.78947368, 0.22222222, 1.        , 0.        ],
       [0.06672431, 0.63157895, 0.22222222, 1.        , 0.        ],
       [0.12393717, 0.15789474, 1.        , 0.        , 1.        ],
       ...,
       [0.0266609 , 0.63157895, 1.        , 0.        , 1.        ],
       [0.06686842, 0.        , 0.55555556, 1.        , 1.        ],
       [0.02939905, 0.63157895, 0.77777778, 1.        , 1.        ]])

In [286]:
import pickle
pickle.dump(dt2, open('model.pkl','wb'))

In [287]:
model = pickle.load( open('model.pkl','rb'))
print(model.predict([[0.104842, 0.243, 0.03451, 0.4566, 1]]))

[2]


In [74]:
# feature_names = ["age_months", 'color_weights', 'breed_weights', 'gender_weights', 'neuter_weights']
color_weights

['Red Mix',
 'Apricot',
 'Apricot Mix',
 'White Mix',
 'Gray Mix',
 'Yellow Mix',
 'Tricolor',
 'Gray',
 'White',
 'Tricolor Mix',
 'Blue',
 'Tan Mix',
 'Brown Mix',
 'Yellow',
 'Blue Mix',
 'Black',
 'Tan',
 'Brown',
 'Red',
 'Black Mix']

In [82]:
final_dog_df[final_dog_df['color_weights']==1]

Unnamed: 0,outcome_weights,age_months,color_weights,breed_weights,gender_weights,neuter_weights
37,2,48.559519,1,8,1,3
109,2,57.956016,1,3,2,1
176,2,2.102713,1,7,1,2
181,3,49.906569,1,2,2,1
203,2,30.160784,1,7,1,3
...,...,...,...,...,...,...
77534,2,80.428756,1,6,1,3
77804,2,192.003943,1,2,2,1
77959,2,10.349288,1,2,2,1
77980,2,6.308138,1,7,2,1
