In [78]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, classification_report

import warnings
warnings.filterwarnings("ignore")

### Load & Slight Preprocess

In [79]:
dogs = pd.read_excel('Healthy_Dog_Vet_Reports.xlsx')

X = dogs.loc[:,['Age (months)', 'IsOldPet', 'Gender', 'Neutered', 'BreedGroupId', \
                'Age segment', 'Energetic/Enthusiastic', 'Happy/Content', 'Active/Comfortable', 'Calm/Relaxed']]

X["BreedGroupId"] = X.BreedGroupId.map({1 : 0, 2 : 1, 3 : 2, 4 : 3}) 
X["Gender"] = X.Gender.map({'F': 0, 'M': 1})
X["Neutered"] = X["Neutered"].astype(str).str.upper().map({'TRUE': 0, 'FALSE': 1})
X["IsOldPet"] = X["IsOldPet"].astype(str).str.upper().map({'TRUE': 0, 'FALSE': 1})
X["Age segment"] = X["Age segment"].map({'young' : 0, 'middle-aged' : 1, 'old' : 2}) 

y = X['BreedGroupId'] # 0 -- Small, 1 -- Medium, 2 -- Large, 3 -- Extra Large
X = X.drop(['BreedGroupId'], axis = 1)

In [80]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Random Forest

In [81]:
rf_classifier = RandomForestClassifier(n_estimators = 10,                         # number of trees
                                       criterion = 'gini',
                                       max_samples = int(X_train.shape[0] * 0.8),   # amount of samples in each tree
                                       min_impurity_decrease = 0.1, 
                                       max_depth = 2,
                                       max_features = None,                       # we are using all features
                                       oob_score = True,                          # out of bag accuracy score
                                       random_state = 42)

In [82]:
rf_classifier.fit(X_train, y_train)

In [83]:
y_pred = rf_classifier.predict(X_test)

In [84]:
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
oob_score = rf_classifier.oob_score_
print(f'Out-of-Bag Score: {oob_score:.2f}')

Accuracy: 0.35
Out-of-Bag Score: 0.34


In [85]:
rf_classifier2 = RandomForestClassifier(n_estimators = 10,                         # number of trees
                                       criterion = 'gini',
                                       max_samples = int(X_train.shape[0] * 0.8),   # amount of samples in each tree
                                       min_impurity_decrease = 0.1, 
                                       max_depth = 2,
                                       max_features = 4,
                                       oob_score = True,
                                       random_state = 42)

In [86]:
rf_classifier2.fit(X_train, y_train)
y_pred2 = rf_classifier2.predict(X_test)
accuracy2 = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy2:.2f}')
oob_score = rf_classifier.oob_score_
print(f'Out-of-Bag Score: {oob_score:.2f}')

Accuracy: 0.35
Out-of-Bag Score: 0.34
