In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
!pip install pandas numpy matplotlib seaborn scikit-learn xgboost joblib




In [3]:
# train_model.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor


# -----------------------------
# 1. LOAD DATA
# -----------------------------
df = pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/AcadOptimizer/Data/student_performance_semester (1).csv")

print("Dataset Loaded ‚úÖ")
print(df.head())


# -----------------------------
# 2. EXPLORATORY DATA ANALYSIS
# -----------------------------
print("\n‚úÖ Dataset Info:")
print(df.info())

print("\n‚úÖ Missing Values:")
print(df.isnull().sum())

print("\n‚úÖ Statistical Summary:")
print(df.describe())


# -----------------------------
# 3. FEATURE ENGINEERING
# -----------------------------

df["missing_classes"] = df["total_classes"] - df["attended_classes"]
df["performance_score"] = df["midterm_marks"] + df["assignment_marks"] + df["quiz_marks"] + df["internal_marks"]

# Target variable
target = "final_gpa"

# Features
features = [
    "semester", "total_classes", "attended_classes", "missing_classes",
    "midterm_marks", "assignment_marks", "quiz_marks", "internal_marks",
    "final_exam_marks", "attendance_percent",
    "study_hours_per_day", "participation_score",
    "previous_sem_gpa", "performance_score"
]

X = df[features]
y = df[target]


# -----------------------------
# 4. TRAIN-TEST SPLIT
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("\n‚úÖ Train/Test Split Done")
print("Train Shape:", X_train.shape)
print("Test Shape:", X_test.shape)


# -----------------------------
# 5. PREPROCESSING PIPELINE
# -----------------------------
numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

preprocess = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, features)
    ]
)


# -----------------------------
# 6. TRAIN MODELS
# -----------------------------

models = {
    "LinearRegression": LinearRegression(),
    "RandomForest": RandomForestRegressor(n_estimators=200, random_state=42),
    "XGBoost": XGBRegressor(
        n_estimators=300, learning_rate=0.05, max_depth=5,
        subsample=0.8, colsample_bytree=0.8, random_state=42
    )
}

results = {}

print("\n==============================")
print("  TRAINING MODELS")
print("==============================")

for name, model in models.items():

    pipe = Pipeline(steps=[
        ("preprocess", preprocess),
        ("model", model)
    ])

    pipe.fit(X_train, y_train)
    preds = pipe.predict(X_test)

    mae = mean_absolute_error(y_test, preds)
    rmse = np.sqrt(mean_squared_error(y_test, preds))
    r2 = r2_score(y_test, preds)

    results[name] = {
        "MAE": mae,
        "RMSE": rmse,
        "R2 Score": r2
    }

    print(f"\n‚úÖ {name} Performance:")
    print("MAE:", mae)
    print("RMSE:", rmse)
    print("R2 Score:", r2)


# -----------------------------
# 7. CHOOSE BEST MODEL
# -----------------------------

best_model_name = max(results, key=lambda x: results[x]["R2 Score"])
print("\n‚úÖ BEST MODEL SELECTED:", best_model_name)

final_model = Pipeline(steps=[
    ("preprocess", preprocess),
    ("model", models[best_model_name])
])

final_model.fit(X, y)

joblib.dump(list(X.columns), "feature_names.pkl")
print("‚úÖ Saved feature names too")

# -----------------------------
# 8. SAVE MODEL
# -----------------------------

joblib.dump(final_model, "final_model.pkl")
print("\n‚úÖ final_model.pkl Saved Successfully!! üéâ")


Dataset Loaded ‚úÖ
  student_id  semester  total_classes  attended_classes  midterm_marks  \
0     STU001         7             52                47             92   
1     STU002         4             80                24             46   
2     STU003         5             42                34             62   
3     STU004         7             78                68             58   
4     STU005         3             45                33             42   

   assignment_marks  quiz_marks  internal_marks  final_exam_marks  \
0                17           8              18                68   
1                20          14              22                51   
2                18          10              11                78   
3                14          16              10                41   
4                23          12              25                71   

   attendance_percent  study_hours_per_day  participation_score  \
0               90.38                  5.7            

In [4]:
import pandas as pd
import joblib

# Load model and features
model = joblib.load("final_model.pkl")
feature_names = joblib.load("feature_names.pkl")

# Create a test sample with all feature names
sample = pd.DataFrame([{
    "semester": 4,
    "total_classes": 100,
    "attended_classes": 75,
    "missing_classes": 25,
    "midterm_marks": 22,
    "assignment_marks": 18,
    "quiz_marks": 15,
    "internal_marks": 20,
    "final_exam_marks": 60,
    "attendance_percent": 75,
    "study_hours_per_day": 3,
    "participation_score": 6,
    "previous_sem_gpa": 7.8,
    "performance_score": 75
}])

# Predict GPA
prediction = model.predict(sample)
print("üéØ Predicted GPA:", prediction[0])



üéØ Predicted GPA: 0.5434874356871566


In [5]:
if prediction[0] < 7.5:
    print("‚ö†Ô∏è You need to improve to reach 8 CGPA.")
else:
    print("‚úÖ Great! You're already meeting the target.")


‚ö†Ô∏è You need to improve to reach 8 CGPA.


In [6]:
required_classes = (0.75 * 100) - 75
print(f"üìÖ Attend {required_classes} more classes to reach 75% attendance.")


üìÖ Attend 0.0 more classes to reach 75% attendance.


In [7]:
!pip install streamlit pyngrok


Collecting streamlit
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.4.1-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m10.2/10.2 MB[0m [31m65.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.4.1-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m80.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.4.1 streamlit-1.51.0


In [8]:
!streamlit run "/content/drive/MyDrive/Colab_Notebooks/AcadOptimizer/App/app.py" &>/dev/null&


In [14]:
from pyngrok import ngrok

# Replace 'YOUR_AUTHTOKEN' with the actual token from Ngrok dashboard
ngrok.set_auth_token("35HHrptKObUht6YC6PlDUM0R0tW_2VcKT1M5jZXMwkm4BdS1f")

# Now start the tunnel
public_url = ngrok.connect(8501)
print("üéØ Streamlit app is live at:", public_url)


üéØ Streamlit app is live at: NgrokTunnel: "https://theodicean-nonfeatured-sherley.ngrok-free.dev" -> "http://localhost:8501"
