In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_excel('StressLevelDataset.xlsx')

# Display the first few rows of the dataset to understand its structure
df.head()

Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level,academic_performance
0,14,20,0,11,2,1,2,4,2,3,...,2,2,3,3,2,3,3,2,1,3
1,15,8,1,15,5,3,1,4,3,1,...,2,4,1,5,1,4,5,5,2,1
2,12,18,1,14,2,1,2,2,2,2,...,2,3,3,2,2,3,2,2,1,2
3,16,12,1,15,4,3,1,3,4,2,...,2,4,1,4,1,4,4,5,2,2
4,16,28,0,7,2,3,5,1,3,2,...,3,3,1,2,1,5,0,5,1,4


In [5]:
# Preprocess the data
# Label encode the target variable (assuming 'academic_performance' is the target column)
le = LabelEncoder()
df['academic_performance'] = le.fit_transform(df['academic_performance'])

# Split the dataset into features (X) and target variable (y)
X = df.drop('academic_performance', axis=1)
y = df['academic_performance']

# Split the data into training (70%) and testing (30%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a Random Forest classifier
clf = RandomForestClassifier(n_estimators=1000, random_state=42)

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Display other evaluation metrics
print('\nClassification Report:')
print(classification_report(y_test, y_pred))

# Display confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print('\nConfusion Matrix:')
print(conf_matrix)

Accuracy: 42.73%

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.39      0.33      0.36        51
           2       0.46      0.51      0.49       109
           3       0.45      0.37      0.40        52
           4       0.37      0.40      0.39        57
           5       0.43      0.49      0.46        53

    accuracy                           0.43       330
   macro avg       0.35      0.35      0.35       330
weighted avg       0.42      0.43      0.42       330


Confusion Matrix:
[[ 0  0  3  0  3  2]
 [ 0 17 29  0  4  1]
 [ 1 25 56 23  3  1]
 [ 0  2 25 19  4  2]
 [ 0  0  6  0 23 28]
 [ 0  0  2  0 25 26]]
