In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Step 1: Load the dataset
df = pd.read_csv("sample_student_data.csv")

# Encode the target variable
df["engagement_level"] = df["engagement_level"].map({"low": 0, "medium": 1, "high": 2})

# Separate features and target
X = df[["interaction_count", "quiz_scores", "completion_rate"]]
y = df["engagement_level"]

# Step 2: Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Step 4: Make predictions and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=["low", "medium", "high"])

print("Model Accuracy:", accuracy)
print("\nClassification Report:\n", report)

Model Accuracy: 0.9166666666666666

Classification Report:
               precision    recall  f1-score   support

         low       0.94      0.94      0.94        18
      medium       0.80      0.80      0.80         5
        high       1.00      1.00      1.00         1

    accuracy                           0.92        24
   macro avg       0.91      0.91      0.91        24
weighted avg       0.92      0.92      0.92        24



In [3]:
import pickle
from sklearn.ensemble import RandomForestClassifier

# Assuming X_train, y_train are already defined
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Save the model
with open("model.pkl", "wb") as file:
    pickle.dump(model, file)
