In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
df = pd.read_csv("sjsu_rmp.csv")
df.head()

In [None]:
df = df.drop(columns=["comment", "rating_tags"])
df = df.dropna()
df.head()

In [None]:
order = {
    "A+": 0,
    "A": 1,
    "A-": 2,
    "B+": 3,
    "B": 4,
    "B-": 5,
    "C+": 6,
    "C": 7,
    "C-": 8,
    "D": 9,
    "F": 10,
}
df = df[
    ~df["grade"].isin(
        [
            "Not sure yet",
            "Rather not say",
            "Not_Sure_Yet",
            "Audit/No Grade",
            "Rather_Not_Say",
            "Pass",
            "Fail",
            "Audit/No_Grade",
        ]
    )
]
df["grade_encoded"] = df["grade"].map(order)
df = df.drop(columns=["grade"])
df.head()

In [None]:
df["professor_name"] = df["professor_first_name"] + " " + df["professor_last_name"]
prof_counts = df["professor_name"].value_counts()
df["professor_name"] = df["professor_name"].where(
    prof_counts[df["professor_name"]] >= 10, "Other"
)
df = df.drop(columns=["professor_first_name", "professor_last_name"])
df = pd.get_dummies(df, columns=["professor_department", "professor_name"], dtype=int)

df.head()

In [None]:
x = df.drop(columns=["difficulty_rating"])
y = df["difficulty_rating"]
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.1, random_state=42
)
rf_regressor = RandomForestRegressor(n_estimators=200, random_state=42)
rf_regressor.fit(x_train, y_train)
y_pred = rf_regressor.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")