<a href="https://colab.research.google.com/github/lisethrubio/Project4-Machine_Learning_Model/blob/Steven_branch2/Confusion_Matrix_Class_x_Exercises_inc_gender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn import tree
import pandas as pd
import tensorflow as tf

In [2]:
# Import our input dataset
body_df = pd.read_csv('/content/bodyPerformance.csv')
body_df.head()

Unnamed: 0,age,gender,height_cm,weight_kg,body fat_%,diastolic,systolic,gripForce,sit and bend forward_cm,sit-ups counts,broad jump_cm,class
0,27.0,M,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,C
1,25.0,M,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,A
2,31.0,M,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,C
3,32.0,M,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,B
4,28.0,M,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,B


In [3]:
# Encode (convert to dummy variables) the "gender" column
gender_encoded = pd.get_dummies(body_df['gender'], prefix="gender", dtype=int)

In [4]:
# Concatenate the scaled data DataFrame with the "IMF Country Code" encoded dummies
new_body_df = pd.concat([body_df, gender_encoded], axis=1)

In [5]:
# Encode the target variable 'class'
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(new_body_df["class"].values)

# Remove 'class' and 'gender' columns from the features data
X = new_body_df.drop(columns=["class", "gender", "age", "height_cm", "weight_kg", "diastolic", "systolic"]).values

In [6]:
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [7]:
# Preprocess numerical data for the neural network
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
# Creating the decision tree classifier instance
model = tree.DecisionTreeClassifier()

# Fitting the model
model = model.fit(X_train_scaled, y_train)

# Making predictions using the testing data
predictions = model.predict(X_test_scaled)


In [9]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual A", "Actual B", "Actual C", "Actual D"], columns=["Predicted A", "Predicted B", "Predicted C", "Predicted D"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)


In [10]:
# Printing the confusion matrix and accuracy score
print("Confusion Matrix:")
print(cm_df)
print(f"\nAccuracy Score: {acc_score:.4f}")

Confusion Matrix:
          Predicted A  Predicted B  Predicted C  Predicted D
Actual A          479          213          121           24
Actual B          211          341          202           83
Actual C          102          215          383          138
Actual D           24           81          131          601

Accuracy Score: 0.5387


In [11]:
# Generating a classification report
class_report = classification_report(y_test, predictions, target_names=["A", "B", "C", "D"])
print("\nClassification Report:")
print(class_report)


Classification Report:
              precision    recall  f1-score   support

           A       0.59      0.57      0.58       837
           B       0.40      0.41      0.40       837
           C       0.46      0.46      0.46       838
           D       0.71      0.72      0.71       837

    accuracy                           0.54      3349
   macro avg       0.54      0.54      0.54      3349
weighted avg       0.54      0.54      0.54      3349

