In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns


In [17]:
# Load csv
df = pd.read_csv('/content/student_data.csv')
df.head()

Unnamed: 0,Attendance,Study Hours,Homework Completion,Test Scores,SES,Extracurricular Activities,Parent Involvement,Tutoring,Sleep Hours,Health Conditions,Final Grade
0,80,28,94,53,Middle,1,High,Yes,9,,67.7
1,92,28,82,99,Low,0,Medium,No,6,,71.5
2,97,19,75,72,Low,3,High,Yes,9,Severe,68.75
3,91,23,96,52,Low,2,Low,Yes,7,Severe,56.75
4,70,21,95,64,Middle,2,Low,No,8,,59.45


In [18]:
df.describe()

Unnamed: 0,Attendance,Study Hours,Homework Completion,Test Scores,Extracurricular Activities,Sleep Hours,Final Grade
count,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0
mean,84.99482,20.0043,87.45148,74.9409,2.0003,7.49762,67.380745
std,8.966511,6.067709,7.514246,14.653729,1.416397,1.113813,7.036676
min,70.0,10.0,75.0,50.0,0.0,6.0,43.95
25%,77.0,15.0,81.0,62.0,1.0,7.0,62.45
50%,85.0,20.0,87.0,75.0,2.0,7.0,67.35
75%,93.0,25.0,94.0,88.0,3.0,8.0,72.25
max,100.0,30.0,100.0,100.0,4.0,9.0,93.3


In [19]:
# One-hot encoding on SES, Parent Involvement, Tutoring, Health Conditions
df = pd.get_dummies(df, columns=['SES', 'Parent Involvement', 'Tutoring', 'Health Conditions'])
df.head()

Unnamed: 0,Attendance,Study Hours,Homework Completion,Test Scores,Extracurricular Activities,Sleep Hours,Final Grade,SES_High,SES_Low,SES_Middle,Parent Involvement_High,Parent Involvement_Low,Parent Involvement_Medium,Tutoring_No,Tutoring_Yes,Health Conditions_Mild,Health Conditions_Severe
0,80,28,94,53,1,9,67.7,False,False,True,True,False,False,False,True,False,False
1,92,28,82,99,0,6,71.5,False,True,False,False,False,True,True,False,False,False
2,97,19,75,72,3,9,68.75,False,True,False,True,False,False,False,True,False,True
3,91,23,96,52,2,7,56.75,False,True,False,False,True,False,False,True,False,True
4,70,21,95,64,2,8,59.45,False,False,True,False,True,False,True,False,False,False


In [20]:
# Target: Final Grade
# Features: All other columns
X = df.drop('Final Grade', axis=1)
y = df['Final Grade']

In [21]:
# train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
# model creation
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [23]:
# predictions
y_pred = model.predict(X_test)

In [24]:
# performance evaluation for regression problems
from sklearn.metrics import mean_squared_error, r2_score

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print('Mean Squared Error:', mse)
print('Root Mean Squared Error:', rmse)
print('R-squared:', r2)

Mean Squared Error: 0.8088443320499984
Root Mean Squared Error: 0.8993577330795562
R-squared: 0.9835983021564773


In [25]:
# Save model
import pickle
pickle.dump(model, open('model.pkl', 'wb'))

In [27]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from g

In [None]:
import gradio as gr

def predict_final_grade(attendance, study_hours, homework_completion, test_scores, ses, extracurricular_activities, parent_involvement, tutoring, sleep_hours, health_conditions):
    try:
        # Create a DataFrame with the input values
        input_data = pd.DataFrame({
            'Attendance': [attendance],
            'Study Hours': [study_hours],
            'Homework Completion': [homework_completion],
            'Test Scores': [test_scores],
            'SES': [ses],
            'Extracurricular Activities': [extracurricular_activities],
            'Parent Involvement': [parent_involvement],
            'Tutoring': [tutoring],
            'Sleep Hours': [sleep_hours],
            'Health Conditions': [health_conditions]
        })

        # One-hot encode categorical features
        input_data = pd.get_dummies(input_data, columns=['SES', 'Parent Involvement', 'Tutoring', 'Health Conditions'])
        input_data = input_data.reindex(columns=X.columns, fill_value=0)

        # Make prediction
        predicted_grade = model.predict(input_data)

        # Choose an emoji based on the predicted grade
        if predicted_grade[0] >= 90:
            emoji = "🎉"
        elif predicted_grade[0] >= 80:
            emoji = "👍"
        elif predicted_grade[0] >= 70:
            emoji = "🙂"
        else:
            emoji = "😕"

        return f"Predicted Final Grade: {predicted_grade[0]:.2f} {emoji}"
    except Exception as e:
        return f"Error: {e}"

# Load the trained model
try:
    with open('/content/model.pkl', 'rb') as f:
        model = pickle.load(f)
except FileNotFoundError:
    print("Error: Model file not found.")
except Exception as e:
    print(f"Error loading model: {e}")

# Create the Gradio interface
iface = gr.Interface(
    fn=predict_final_grade,
    inputs=[
        gr.Number(label="Attendance (70-100)"),
        gr.Number(label="Study Hours (10-30)"),
        gr.Number(label="Homework Completion (75-100)"),
        gr.Number(label="Test Scores (50-100)"),
        gr.Dropdown(label="SES", choices=["Low", "Middle", "High"]),
        gr.Number(label="Extracurricular Activities (0-4)"),
        gr.Dropdown(label="Parent Involvement", choices=["Low", "Medium", "High"]),
        gr.Dropdown(label="Tutoring", choices=["Yes", "No"]),
        gr.Number(label="Sleep Hours (6-9)"),
        gr.Dropdown(label="Health Conditions", choices=["None", "Mild", "Severe"])
    ],
    outputs="text",
    title="Student Academic Performance Predictor"
)

# Launch the interface
iface.launch(debug=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://08b7ee0163526714fb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
