# **Fitness_Classification**

# **Import Libraries**

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix,
    classification_report
)

# **Read Dataset**

In [12]:
df = pd.read_csv("fitness_dataset.csv")

In [13]:
df.head()

Unnamed: 0,age,height_cm,weight_kg,heart_rate,blood_pressure,sleep_hours,nutrition_quality,activity_index,smokes,gender,is_fit
0,56,152,65,69.6,117.0,,2.37,3.97,no,F,1
1,69,186,95,60.8,114.8,7.5,8.77,3.19,0,F,1
2,46,192,103,61.4,116.4,,8.2,2.03,0,F,0
3,32,189,83,60.2,130.1,7.0,6.18,3.68,0,M,1
4,60,175,99,58.1,115.8,8.0,9.95,4.83,yes,F,1


# **Dataset Overview**

In [14]:
df.shape

(2000, 11)

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   age                2000 non-null   int64  
 1   height_cm          2000 non-null   int64  
 2   weight_kg          2000 non-null   int64  
 3   heart_rate         2000 non-null   float64
 4   blood_pressure     2000 non-null   float64
 5   sleep_hours        1840 non-null   float64
 6   nutrition_quality  2000 non-null   float64
 7   activity_index     2000 non-null   float64
 8   smokes             2000 non-null   object 
 9   gender             2000 non-null   object 
 10  is_fit             2000 non-null   int64  
dtypes: float64(5), int64(4), object(2)
memory usage: 172.0+ KB


# **Statistical** **Analysis**

In [16]:
df.describe()

Unnamed: 0,age,height_cm,weight_kg,heart_rate,blood_pressure,sleep_hours,nutrition_quality,activity_index,is_fit
count,2000.0,2000.0,2000.0,2000.0,2000.0,1840.0,2000.0,2000.0,2000.0
mean,49.114,174.533,83.5405,70.2886,119.90885,7.513315,5.03514,2.99904,0.3995
std,17.926564,14.37175,25.852534,11.846339,14.578032,1.502031,2.864156,1.136383,0.489918
min,18.0,150.0,30.0,45.0,90.0,4.0,0.0,1.0,0.0
25%,34.0,162.0,64.0,62.1,109.7,6.5,2.5475,2.0375,0.0
50%,49.0,174.0,83.0,70.25,120.0,7.5,5.065,2.98,0.0
75%,65.0,187.0,102.0,78.425,129.8,8.6,7.47,3.95,1.0
max,79.0,199.0,250.0,118.6,171.2,12.0,10.0,4.99,1.0


# **Dataset** **Missign Values**

In [17]:
df.isnull().sum()

Unnamed: 0,0
age,0
height_cm,0
weight_kg,0
heart_rate,0
blood_pressure,0
sleep_hours,160
nutrition_quality,0
activity_index,0
smokes,0
gender,0


In [18]:
df["sleep_hours"] = df["sleep_hours"].fillna(df["sleep_hours"].mean())

In [19]:
df.head()

Unnamed: 0,age,height_cm,weight_kg,heart_rate,blood_pressure,sleep_hours,nutrition_quality,activity_index,smokes,gender,is_fit
0,56,152,65,69.6,117.0,7.513315,2.37,3.97,no,F,1
1,69,186,95,60.8,114.8,7.5,8.77,3.19,0,F,1
2,46,192,103,61.4,116.4,7.513315,8.2,2.03,0,F,0
3,32,189,83,60.2,130.1,7.0,6.18,3.68,0,M,1
4,60,175,99,58.1,115.8,8.0,9.95,4.83,yes,F,1


In [20]:
df['smokes'].value_counts()

Unnamed: 0_level_0,count
smokes,Unnamed: 1_level_1
yes,711
0,581
no,518
1,190


In [21]:
df['smokes'] = df['smokes'].replace({"yes":1, "no":0})

In [23]:
df['smokes'].value_counts()

Unnamed: 0_level_0,count
smokes,Unnamed: 1_level_1
1,711
0,581
0,518
1,190


In [24]:
df['gender'].value_counts()

Unnamed: 0_level_0,count
gender,Unnamed: 1_level_1
F,1030
M,970


In [25]:
df['gender'] =df['gender'].replace({'F':0,'M':1})

In [26]:
df["gender"].value_counts()

Unnamed: 0_level_0,count
gender,Unnamed: 1_level_1
0,1030
1,970


In [27]:
df.head()

Unnamed: 0,age,height_cm,weight_kg,heart_rate,blood_pressure,sleep_hours,nutrition_quality,activity_index,smokes,gender,is_fit
0,56,152,65,69.6,117.0,7.513315,2.37,3.97,0,0,1
1,69,186,95,60.8,114.8,7.5,8.77,3.19,0,0,1
2,46,192,103,61.4,116.4,7.513315,8.2,2.03,0,0,0
3,32,189,83,60.2,130.1,7.0,6.18,3.68,0,1,1
4,60,175,99,58.1,115.8,8.0,9.95,4.83,1,0,1


In [28]:
df['is_fit'].value_counts()

Unnamed: 0_level_0,count
is_fit,Unnamed: 1_level_1
0,1201
1,799


In [29]:
x = df.drop("is_fit", axis=1)
y = df["is_fit"]

In [30]:
smote = SMOTE(random_state=42)
x_resampled, y_resampled = smote.fit_resample(x, y)

In [31]:
x_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=0.2, random_state=42)

# **Feature Scaling**

In [32]:
scale = StandardScaler()
x_train = scale.fit_transform(x_train)
x_test = scale.transform(x_test)

# **Random Forest Model Building**

In [34]:
model = RandomForestClassifier()
model.fit(x_train, y_train)

# Predictions
y_pred = model.predict(x_test)
y_pred_proba = model.predict_proba(x_test)[:, 1]

# **Classification Report**

In [35]:
print("Accuracy:", accuracy_score(y_test, y_pred))

print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

# ROC AUC requires probabilities or scores for the positive class
print("ROC AUC:", roc_auc_score(y_test, y_pred_proba))

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7941787941787942
Precision: 0.7842323651452282
Recall: 0.8008474576271186
F1 Score: 0.7924528301886793
ROC AUC: 0.867390176409547
Confusion Matrix:
 [[193  52]
 [ 47 189]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.79      0.80       245
           1       0.78      0.80      0.79       236

    accuracy                           0.79       481
   macro avg       0.79      0.79      0.79       481
weighted avg       0.79      0.79      0.79       481

