In [1]:
from google.colab import files
uploaded = files.upload()

Saving archive.zip to archive.zip


In [2]:
!unzip -q /content/archive.zip


In [4]:
print(df.columns)


Index(['Student_ID', 'Study_Hours_Per_Day', 'Extracurricular_Hours_Per_Day',
       'Sleep_Hours_Per_Day', 'Social_Hours_Per_Day',
       'Physical_Activity_Hours_Per_Day', 'Grades', 'Stress_Level_High',
       'Stress_Level_Low', 'Stress_Level_Moderate', 'Gender_Female',
       'Gender_Male'],
      dtype='object')


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 1. Read the data from the uploaded file
df = pd.read_csv("student_lifestyle_dataset..csv")  # Change the filename if needed

# 2. Drop missing data
df.dropna(inplace=True)

# 3. Convert categorical columns to numeric
df = pd.get_dummies(df)

# 4. Define features (X) and target (y)
X = df.drop("Grades", axis=1)
y = df["Grades"]

# 5. Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 6. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 7. Save the datasets as CSV files
pd.DataFrame(X_train).to_csv("X_train.csv", index=False)
pd.DataFrame(X_test).to_csv("X_test.csv", index=False)
pd.DataFrame(y_train).to_csv("Y_train.csv", index=False)
pd.DataFrame(y_test).to_csv("Y_test.csv", index=False)

print("✅ Training and testing files have been prepared.")


✅ Training and testing files have been prepared.


In [6]:
files.download("X_train.csv")
files.download("X_test.csv")
files.download("Y_train.csv")
files.download("Y_test.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LinearRegression

# 1. Load the dataset
df = pd.read_csv("student_lifestyle_dataset..csv")
df.dropna(inplace=True)
df = pd.get_dummies(df)

# 2. Check the distribution of 'Grades' before applying any transformations
print("Distribution of Grades before conversion:")
print(df["Grades"].describe())  # This will give us an overview of the Grades column

# Also, print a sample of grades
print("\nSample of Grades:")
print(df["Grades"].head())

# 3. Convert Grades (continuous) to categories for classification
def convert_grade(grade):
    if grade >= 9:
        return "High"
    elif grade >= 7:
        return "Medium"
    elif grade >= 5:
        return "Low"
    else:
        return "Very_Low"  # Adding more categories to avoid imbalance

# Apply to create Grade_Level column
df["Grade_Level"] = df["Grades"].apply(convert_grade)

# Check the distribution of Grade_Level after conversion
print("\nDistribution of Grade_Level after conversion:")
print(df["Grade_Level"].value_counts())

# 4. Ensure we have multiple classes
if df["Grade_Level"].nunique() == 1:
    raise ValueError("There is only one class in the Grade_Level column. The dataset is imbalanced and needs to be fixed before training.")

# 5. Features and labels
X = df.drop(["Grades", "Grade_Level"], axis=1)
y = df["Grade_Level"]

# 6. Check the distribution of classes in y before splitting
print("\nDistribution of classes in y before splitting:")
print(y.value_counts())

# 7. Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 8. Split the data (with stratify to ensure balanced class distribution)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Check the distribution after splitting
print("\nDistribution of classes in y_train after splitting:")
print(y_train.value_counts())

# 9. Train classifiers

# K-Nearest Neighbors
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
pred_knn = knn.predict(X_test)
pd.DataFrame(pred_knn, columns=["Prediction"]).to_csv("predictions_KNN_model.csv", index=False)

# Artificial Neural Network (MLP)
ann = MLPClassifier(max_iter=1000)
ann.fit(X_train, y_train)
pred_ann = ann.predict(X_test)
pd.DataFrame(pred_ann, columns=["Prediction"]).to_csv("predictions_ANN_model.csv", index=False)

# Support Vector Machine
svm = SVC()
svm.fit(X_train, y_train)
pred_svm = svm.predict(X_test)
pd.DataFrame(pred_svm, columns=["Prediction"]).to_csv("predictions_SVM_model.csv", index=False)

# Random Forest
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
pred_rf = rf.predict(X_test)
pd.DataFrame(pred_rf, columns=["Prediction"]).to_csv("predictions_RF_model.csv", index=False)

# Decision Tree
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
pred_tree = tree.predict(X_test)
pd.DataFrame(pred_tree, columns=["Prediction"]).to_csv("predictions_DecisionTree_model.csv", index=False)

# Naive Bayes
nb = GaussianNB()
nb.fit(X_train, y_train)
pred_nb = nb.predict(X_test)
pd.DataFrame(pred_nb, columns=["Prediction"]).to_csv("predictions_NaiveBayes_model.csv", index=False)

# Linear Regression (used on continuous grades)
y_reg = df["Grades"]
X_train_lr, X_test_lr, y_train_lr, y_test_lr = train_test_split(X_scaled, y_reg, test_size=0.2, random_state=42)
lr = LinearRegression()
lr.fit(X_train_lr, y_train_lr)
pred_lr = lr.predict(X_test_lr)
pred_lr_class = np.round(pred_lr).astype(int)
pd.DataFrame(pred_lr_class, columns=["Prediction"]).to_csv("predictions_LinearRegression_model.csv", index=False)

print("✅ All models trained and predictions saved successfully.")






Distribution of Grades before conversion:
count    2000.000000
mean        7.789825
std         0.746777
min         5.600000
25%         7.250000
50%         7.780000
75%         8.320000
max        10.000000
Name: Grades, dtype: float64

Sample of Grades:
0    7.48
1    6.88
2    6.68
3    7.20
4    8.78
Name: Grades, dtype: float64

Distribution of Grade_Level after conversion:
Grade_Level
Medium    1601
Low        292
High       107
Name: count, dtype: int64

Distribution of classes in y before splitting:
Grade_Level
Medium    1601
Low        292
High       107
Name: count, dtype: int64

Distribution of classes in y_train after splitting:
Grade_Level
Medium    1281
Low        234
High        85
Name: count, dtype: int64
✅ All models trained and predictions saved successfully.


In [21]:
files.download("predictions_KNN_model.csv")
files.download("predictions_ANN_model.csv")
files.download("predictions_SVM_model.csv")
files.download("predictions_RF_model.csv")
files.download("predictions_DecisionTree_model.csv")
files.download("predictions_NaiveBayes_model.csv")
files.download("predictions_LinearRegression_model.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>