In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix


In [2]:
# Load your data into a pandas dataframe
data = pd.read_csv('features.csv',index_col=0)
data = data[~data.isnull().any(axis=1)]

In [3]:
# Split the data into training and testing sets
train, test = train_test_split(data, test_size=0.2, random_state=42)


# Separate the target variable from the input features
X_train = train.drop('OUTCOMETYPE', axis=1)
y_train = train['OUTCOMETYPE']

X_test = test.drop('OUTCOMETYPE', axis=1)
y_test = test['OUTCOMETYPE']

cols = X_train.columns


In [4]:
# Scale the input features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Instantiate the Random Forest Classifier model
model = RandomForestClassifier(n_estimators=100, random_state=99)

# Fit the model over training data
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.88


In [6]:
# Print the confusion matrix
print("\n")
print("-------------------------------------------------------------")
print("Confusion Matrix:")
print("-----------------")
print(confusion_matrix(y_test, y_pred))

# Print the classification report
print("\n")
print("-------------------------------------------------------------")
print("Classification Report:")
print("------------------------")
rpt =classification_report(y_test, y_pred, output_dict=False)
print(rpt)




-------------------------------------------------------------
Confusion Matrix:
-----------------
[[6116   46  190]
 [ 192   51  114]
 [ 626   15 2766]]


-------------------------------------------------------------
Classification Report:
------------------------
              precision    recall  f1-score   support

       ADOPT       0.88      0.96      0.92      6352
        DIED       0.46      0.14      0.22       357
         RTO       0.90      0.81      0.85      3407

    accuracy                           0.88     10116
   macro avg       0.75      0.64      0.66     10116
weighted avg       0.87      0.88      0.87     10116



In [8]:
# Print the feature ranking

# Get feature importances
importances = model.feature_importances_

# Sort feature importances in descending order
indices = np.argsort(importances)[::-1]

print(70*"#")
print("FEATURE RANKING USING IMPORTANCE SCORE")
print(70*"#")
print("RANK","\tIMPORTANCE", "\tFEATURE")
print("----","\t----------", "\t--------------------------")
for f in range(X_train.shape[1]):    
    #print(f"{f + 1}. feature {indices[f]} ({importances[indices[f]]:.2f})")
    print(f"{f + 1}. \t({importances[indices[f]]:.2f}) \t\t{cols[f]} ")
print(70*"#")

######################################################################
FEATURE RANKING USING IMPORTANCE SCORE
######################################################################
RANK 	IMPORTANCE 	FEATURE
---- 	---------- 	--------------------------
1. 	(0.54) 		AGEOUTCOME 
2. 	(0.14) 		PREVHIST 
3. 	(0.13) 		DAYS_STAY 
4. 	(0.09) 		IS_PUREBREED 
5. 	(0.07) 		INTAKETYPE_ABANDONED 
6. 	(0.02) 		INTAKETYPE_EUTHANASIAREQUEST 
7. 	(0.01) 		INTAKETYPE_OWNERSURRENDER 
8. 	(0.00) 		INTAKETYPE_PUBLICASSIST 
9. 	(0.00) 		INTAKETYPE_STRAY 
######################################################################
