In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("../data/processed_study_memory_data.csv")
df

In [None]:
features = [
    "days_since_last_revision",
    "revision_count",
    "difficulty",
    "last_score",
    "revision_density",
    "difficulty_impact",
    "performance_gap"
]

X = df[features]
y = df["memory_strength"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
lr_model = LinearRegression()
rf_model = RandomForestRegressor(random_state=42)

lr_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)

In [None]:
lr_preds = lr_model.predict(X_test)
rf_preds = rf_model.predict(X_test)

In [None]:
def evaluate(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    return mae, rmse, r2

lr_mae, lr_rmse, lr_r2 = evaluate(y_test, lr_preds)
rf_mae, rf_rmse, rf_r2 = evaluate(y_test, rf_preds)

lr_mae, lr_rmse, lr_r2, rf_mae, rf_rmse, rf_r2

In [None]:
models = ["Linear Regression", "Random Forest"]
r2_scores = [lr_r2, rf_r2]

plt.figure(figsize=(6,4))
plt.bar(models, r2_scores)
plt.ylabel("R2 Score")
plt.title("Model Comparison")
plt.show()