In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score


file_path = "college_english_learning_dataset.csv"
dataset = pd.read_csv(file_path)


target = 'Quiz_Score' 
features = ['Session_Type', 'Engagement_Level', 'Response_Time', 
            'Time_Spent', 'Mistakes_Observed', 'Activity_Status', 'Model_Used']

X = dataset[features]
y = dataset[target]


categorical_features = X.select_dtypes(include=['object']).columns
encoder = OneHotEncoder(sparse_output=False, drop='first', handle_unknown='ignore')
X_categorical_encoded = encoder.fit_transform(X[categorical_features])


numerical_features = X.select_dtypes(include=['int64', 'float64']).columns
scaler = StandardScaler()
X_numerical_scaled = scaler.fit_transform(X[numerical_features])

X_processed = np.hstack([X_numerical_scaled, X_categorical_encoded])

X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

regressor = GradientBoostingRegressor(random_state=42)
regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-Squared (R²): {r2:.2f}")

sample_numerical = [2, 5.0, 3.5, 100] 
sample_categorical = ['SessionB', 'Active', 'Model2']  

sample_input_df = pd.DataFrame([sample_categorical + sample_numerical],
                               columns=list(categorical_features) + list(numerical_features))

sample_categorical_encoded = encoder.transform(sample_input_df[categorical_features])
sample_numerical_scaled = scaler.transform(sample_input_df[numerical_features])

sample_input_processed = np.hstack([sample_numerical_scaled, sample_categorical_encoded])

# Make prediction
sample_prediction = regressor.predict(sample_input_processed)
print("\nPrediction for Sample Input:")
print(f"Predicted Quiz Score: {sample_prediction[0]:.2f}")


Model Evaluation:
Mean Squared Error (MSE): 229.38
R-Squared (R²): -0.10

Prediction for Sample Input:
Predicted Quiz Score: 93.42


