In [7]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# 1️⃣ Load the dataset
df = pd.read_csv("Learning_Data.csv")  # Change filename accordingly

# 2️⃣ Scale 'Time_Spent_on_Videos' using MinMaxScaler
scaler = MinMaxScaler()
df['Time_Spent_on_Videos_Scaled'] = scaler.fit_transform(df[['Time_Spent_on_Videos']])

# 3️⃣ Compute Efficiency Score (matching final.ipynb logic)
df['Efficiency_Score'] = (0.4 * df['Final_Exam_Score'] +
                          0.4 * df['Quiz_Scores'] +
                          0.2 * df['Time_Spent_on_Videos_Scaled'] * 100)

# 4️⃣ Categorize into 'Proficiency_Level'
df['Proficiency_Level'] = pd.qcut(df['Efficiency_Score'], q=3, labels=['beginner', 'intermediate', 'advanced'])

# Drop unnecessary columns
df.drop(columns=['Time_Spent_on_Videos_Scaled', 'Efficiency_Score'], inplace=True)

# 5️⃣ Encode categorical variables using separate LabelEncoders
encoder_education = LabelEncoder()
df['Education_Level'] = encoder_education.fit_transform(df['Education_Level'])

encoder_course = LabelEncoder()
df['Course_Name'] = encoder_course.fit_transform(df['Course_Name'])

encoder_proficiency = LabelEncoder()
df['Proficiency_Level'] = encoder_proficiency.fit_transform(df['Proficiency_Level'])  # Encode labels

# 6️⃣ Define features (X) and target (y)
X = df[['Age', 'Education_Level', 'Course_Name', 'Time_Spent_on_Videos', 'Quiz_Scores', 'Final_Exam_Score']]
y = df['Proficiency_Level']

# 7️⃣ Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# 8️⃣ Train the model (Random Forest)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 9️⃣ Save the trained model and encoders
joblib.dump(model, "final_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(encoder_education, "encoder_education.pkl")
joblib.dump(encoder_course, "encoder_course.pkl")
joblib.dump(encoder_proficiency, "encoder_proficiency.pkl")  # Save proficiency encoder

print("Model trained and saved successfully!")


Model trained and saved successfully!
