In [1]:
# Initial imports.
import pandas as pd
from path import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
# Loading data
file_path = Path("result.csv")
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,animal_id,datetime_intake,found_location,intake_type,intake_condition,animal_type_intake,name_intake,sex_intake,color_intake,breed_type,datetime_outcome,outcome_type,sex_upon_outcome,fixed_changed,age_bucket,datetime_length
0,A730601,2016-07-07 12:11:00,1109 Shady Ln in Austin (TX),Stray,Normal,Cat,No,Intact Male,Tabby,Mix,2016-07-08 09:00:00,Transfer,Neutered Male,1,7-12 months,74940000000000
1,A676515,2014-04-11 08:45:00,615 E. Wonsley in Austin (TX),Stray,Normal,Dog,Yes,Intact Male,Bicolor,Mix,2014-04-14 18:38:00,Return to Owner,Neutered Male,1,1-6 months,294780000000000
2,A679549,2014-05-22 15:43:00,124 W Anderson in Austin (TX),Stray,Normal,Cat,Yes,Intact Male,Bicolor,Mix,2014-06-16 13:54:00,Transfer,Neutered Male,1,1-6 months,2153460000000000
3,A683798,2016-07-21 12:16:00,3118 Windsor Rd in Austin (TX),Stray,Normal,Cat,Yes,Spayed Female,Bicolor,Mix,2016-10-18 10:55:00,Adoption,Spayed Female,0,1-3 years,7684740000000000
4,A683656,2014-07-13 13:20:00,8238 Research Blvd in Austin (TX),Stray,Normal,Cat,No,Intact Male,Point,Mix,2014-07-17 16:57:00,Adoption,Neutered Male,1,1-6 months,358620000000000


In [3]:
new_df=df.drop(columns=['animal_id','datetime_intake','found_location','datetime_outcome',
                    'sex_intake','fixed_changed'])
new_df.head()

Unnamed: 0,intake_type,intake_condition,animal_type_intake,name_intake,color_intake,breed_type,outcome_type,sex_upon_outcome,age_bucket,datetime_length
0,Stray,Normal,Cat,No,Tabby,Mix,Transfer,Neutered Male,7-12 months,74940000000000
1,Stray,Normal,Dog,Yes,Bicolor,Mix,Return to Owner,Neutered Male,1-6 months,294780000000000
2,Stray,Normal,Cat,Yes,Bicolor,Mix,Transfer,Neutered Male,1-6 months,2153460000000000
3,Stray,Normal,Cat,Yes,Bicolor,Mix,Adoption,Spayed Female,1-3 years,7684740000000000
4,Stray,Normal,Cat,No,Point,Mix,Adoption,Neutered Male,1-6 months,358620000000000


In [4]:
from sklearn.preprocessing import LabelEncoder

In [5]:
# encode text columns with dummy values
df_encoded = pd.get_dummies(new_df, columns=['intake_type', 'intake_condition', 'animal_type_intake', 'name_intake', 
                    'color_intake', 'sex_upon_outcome', 'age_bucket', 'breed_type'])

In [6]:
le = LabelEncoder()
df_encoded['outcome_type'] = le.fit_transform(df_encoded['outcome_type'])
le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(le_name_mapping)

{'Adoption': 0, 'Died': 1, 'Euthanasia': 2, 'Missing': 3, 'Return to Owner': 4, 'Transfer': 5}


In [7]:
df_encoded.head()

Unnamed: 0,outcome_type,datetime_length,intake_type_Euthanasia Request,intake_type_Owner Surrender,intake_type_Public Assist,intake_type_Stray,intake_type_Wildlife,intake_condition_Maternity,intake_condition_Medical,intake_condition_Normal,...,age_bucket_4-6 years,age_bucket_7+ years,age_bucket_7-12 months,age_bucket_Less than 1 week,breed_type_Bully Breeds,breed_type_Domestic Shorthair,breed_type_Mix,breed_type_Other,breed_type_Toy Breeds,breed_type_Wildlife
0,5,74940000000000,0,0,0,1,0,0,0,1,...,0,0,1,0,0,0,1,0,0,0
1,4,294780000000000,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
2,5,2153460000000000,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
3,0,7684740000000000,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
4,0,358620000000000,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0


In [8]:
# Define the features set.
X = df_encoded.copy()
X=X.drop('outcome_type',axis=1)
X.head()

Unnamed: 0,datetime_length,intake_type_Euthanasia Request,intake_type_Owner Surrender,intake_type_Public Assist,intake_type_Stray,intake_type_Wildlife,intake_condition_Maternity,intake_condition_Medical,intake_condition_Normal,intake_condition_Other,...,age_bucket_4-6 years,age_bucket_7+ years,age_bucket_7-12 months,age_bucket_Less than 1 week,breed_type_Bully Breeds,breed_type_Domestic Shorthair,breed_type_Mix,breed_type_Other,breed_type_Toy Breeds,breed_type_Wildlife
0,74940000000000,0,0,0,1,0,0,0,1,0,...,0,0,1,0,0,0,1,0,0,0
1,294780000000000,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
2,2153460000000000,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
3,7684740000000000,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
4,358620000000000,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0


In [9]:
# Define the target set.
y = df_encoded["outcome_type"].ravel()
y[:5]

array([5, 4, 5, 0, 0])

In [10]:
# Splitting into Train and Test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [11]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 

In [13]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [14]:
# Making predictions using the testing data.
predictions = rf_model.predict(X_test_scaled)

In [16]:
results = pd.DataFrame({
   "Prediction": predictions,
   "Actual": y_test
}).reset_index(drop=True)
results.head()

Unnamed: 0,Prediction,Actual
0,5,0
1,5,5
2,0,0
3,5,5
4,4,4


In [18]:
# print confusion matrix
cm = confusion_matrix(predictions, y_test)
cm_df = pd.DataFrame(cm,
                     index = ['Adoption', 'Died', 'Euthanasia', 'Missing', 'Return to Owner', 'Transfer'], 
                     columns = ['Adoption', 'Died', 'Euthanasia', 'Missing', 'Return to Owner', 'Transfer'])
cm_df

Unnamed: 0,Adoption,Died,Euthanasia,Missing,Return to Owner,Transfer
Adoption,2393,9,35,3,148,492
Died,11,41,44,0,2,19
Euthanasia,31,59,703,0,36,113
Missing,0,0,1,1,0,1
Return to Owner,142,2,30,0,931,111
Transfer,448,40,140,1,117,2298


In [19]:
# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)

In [21]:
print(f"Accuracy Score : {acc_score}")
# print classification report
print(classification_report(predictions, y_test, target_names=['Adoption', 'Died', 'Euthanasia', 'Missing', 'Return to Owner', 'Transfer']))

Accuracy Score : 0.757795762913592
                 precision    recall  f1-score   support

       Adoption       0.79      0.78      0.78      3080
           Died       0.27      0.35      0.31       117
     Euthanasia       0.74      0.75      0.74       942
        Missing       0.20      0.33      0.25         3
Return to Owner       0.75      0.77      0.76      1216
       Transfer       0.76      0.75      0.76      3044

       accuracy                           0.76      8402
      macro avg       0.59      0.62      0.60      8402
   weighted avg       0.76      0.76      0.76      8402

