In [10]:
import os
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
import joblib

# ==== 1. LOAD DATASET ====
# Absolute path to your CSV file
dataset_path = r"D:\resume project\Student_Academic_Performance_Prediction\StudentsPerformance.csv"
df = pd.read_csv(dataset_path)

# ==== 2. FEATURES & TARGET ====
cat_features = ["gender", "race/ethnicity", "parental level of education", "lunch", "test preparation course"]
num_features = ["math score", "reading score", "writing score"]
target = df[["math score", "reading score", "writing score"]].mean(axis=1)  # average score

# ==== 3. PIPELINE ====
pre = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features),
    ("num", "passthrough", num_features),
])

model = RandomForestRegressor(n_estimators=50, random_state=42)
pipe = Pipeline([("pre", pre), ("model", model)])

# ==== 4. TRAIN ====
pipe.fit(df[cat_features + num_features], target)

# ==== 5. SAVE MODEL ====
model_dir = r"D:\resume project\Student_Academic_Performance_Prediction\models"
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, "best_model.pkl")
joblib.dump(pipe, model_path)

print(f"✅ Model trained and saved at: {model_path}")


✅ Model trained and saved at: D:\resume project\Student_Academic_Performance_Prediction\models\best_model.pkl


In [12]:
import os
import joblib

MODEL_PATH = os.path.join("models", "best_model.pkl")
os.makedirs("models", exist_ok=True)

joblib.dump(pipe, MODEL_PATH)  # pipe is your trained pipeline
print(f"Model saved to {MODEL_PATH}")


Model saved to models\best_model.pkl


In [3]:
# ==== TRAIN & SAVE MODEL SCRIPT ====

import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor

# 1. Load dataset (update if your filename is different)
dataset_path = r"D:\resume project\Student_Academic_Performance_Prediction\StudentsPerformance.csv"
df = pd.read_csv(dataset_path)

# 2. Features & Target
X = df.drop(columns=["math score", "reading score", "writing score"])
y = df[["math score", "reading score", "writing score"]].mean(axis=1)  # average score

# 3. Categorical & numeric columns
cat_features = ["gender", "race/ethnicity", "parental level of education", "lunch", "test preparation course"]

# 4. Preprocessing
preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features),
    ("num", StandardScaler(), [])
], remainder="passthrough")

# 5. Pipeline (preprocessing + model)
pipe = Pipeline([
    ("preprocessor", preprocessor),
    ("model", RandomForestRegressor(n_estimators=100, random_state=42))
])

# 6. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 7. Train model
pipe.fit(X_train, y_train)

# 8. Save trained pipeline as best_model.pkl
joblib.dump(pipe, r"D:\resume project\Student_Academic_Performance_Prediction\models\best_model.pkl")

print("✅ Model trained and saved successfully as best_model.pkl")


✅ Model trained and saved successfully as best_model.pkl


In [5]:
import joblib

model_path = r"D:\resume project\Student_Academic_Performance_Prediction\models\best_model.pkl"
model = joblib.load(model_path)

print("✅ Model loaded successfully:", type(model))


✅ Model loaded successfully: <class 'sklearn.pipeline.Pipeline'>


In [7]:
import joblib
import streamlit as st
import os

# Path to model
MODEL_PATH = "models/best_model.pkl"

# Try to load model
model = None
if os.path.exists(MODEL_PATH):
    try:
        model = joblib.load(MODEL_PATH)
        st.success("✅ Model loaded successfully! You are using the trained model.")
    except Exception as e:
        st.error(f"⚠️ Found model file but could not load it. Using fallback model. Error: {e}")
else:
    st.error("❌ No trained model found. Using fallback model.")


2025-08-17 21:21:47.251 
  command:

    streamlit run D:\ProgramData\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [10]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
import joblib

# ==== 1. Load dataset ====
df = pd.read_csv(r"D:\resume project\Student_Academic_Performance_Prediction\StudentsPerformance.csv")

# ==== 2. Features & Target ====
cat_features = ["gender", "race/ethnicity", "parental level of education", "lunch", "test preparation course"]
num_features = ["math score", "reading score", "writing score"]

X = df[cat_features + num_features]
y = df[num_features].mean(axis=1)   # target = average score

# ==== 3. Preprocessing + Model ====
pre = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features),
    ("num", "passthrough", num_features)
])

model = RandomForestRegressor(n_estimators=100, random_state=42)

pipe = Pipeline([
    ("pre", pre),
    ("model", model)
])

# ==== 4. Train ====
pipe.fit(X, y)

# ==== 5. Save trained model ====
joblib.dump(pipe, r"D:\resume project\Student_Academic_Performance_Prediction\models\best_model.pkl")

print("✅ Trained model saved successfully!")


✅ Trained model saved successfully!
