In [48]:
# Same Code as other files in this cell
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import train_test_split

survey_data = pd.read_csv('Updated_ExperimentalDataforRnew(in).csv')
survey_data.dropna(inplace = True)
survey_data.drop(survey_data.loc[survey_data['ETHNICITY GROUP']=='Unknown'].index, inplace=True)

survey_data['MAJOR CODE'] = survey_data['MAJOR CODE'].map({
    'BUAD': 0,
    'CIS': 1,
    'ACCT': 2,
    'OTHER': 3
})

survey_data['GENDER'] = survey_data['GENDER'].map({
    'M': 0,
    'F': 1
})

survey_data['ETHNICITY GROUP'] = survey_data['ETHNICITY GROUP'].map({
    'White': 0,
    'African American': 1,
    'Other': 2
})

survey_data['CLASS GROUP'] = survey_data['CLASS GROUP'].map({
    'FRSO': 0,
    'JRSR': 1,
})

survey_data['COVID'] = survey_data['COVID'].map({
    'PRECOVID': 0,
    'COVID': 1,
    'POSTCOVID': 2
})

X = survey_data[['MAJOR CODE', 'GENDER', 'ETHNICITY GROUP', 'CLASS GROUP', 'COVID']]
y = survey_data['NSUGPA']

sc = StandardScaler()
sc.fit(X)
X = sc.transform(X)
y = y.to_numpy()

In [42]:
# Linear Regression (Terrible)

In [65]:
from sklearn import linear_model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7)
lin = linear_model.LinearRegression()
lin.fit(X_train, y_train)
lin.predict([[-0.96578659, -1.04855251,  1.91713358, -0.69829725,  0.88034084]])
y_pred = lin.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error (MSE): {mse}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'R-squared (R²): {r2}')

Mean Squared Error (MSE): 1.426633890577257
Mean Absolute Error (MAE): 0.9476349092310344
R-squared (R²): 0.10475297206960343


In [36]:
# Decision Tree Regressor

In [66]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error (MSE): {mse}')
print(f'R-squared (R²): {r2}')

Mean Squared Error (MSE): 1.7727230306460235
R-squared (R²): -0.06367541708371127


In [None]:
# Random Forest Regressor

In [67]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error (MSE): {mse}')
print(f'R-squared (R²): {r2}')

Mean Squared Error (MSE): 1.5923195590463497
R-squared (R²): 0.011007337136192619


In [45]:
# K Neighbors

In [68]:
from sklearn.neighbors import KNeighborsRegressor
regressor = KNeighborsRegressor(n_neighbors=5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error (MSE): {mse}')
print(f'R-squared (R²): {r2}')

Mean Squared Error (MSE): 1.5803528263254114
R-squared (R²): 0.048035697698048274


In [53]:
# Gradient Boost

In [64]:
from sklearn.ensemble import GradientBoostingRegressor
regressor = GradientBoostingRegressor(n_estimators=100, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error (MSE): {mse}')
print(f'R-squared (R²): {r2}')

Mean Squared Error (MSE): 1.424840581682587
R-squared (R²): 0.10370153849848873


In [63]:
# Support Vector

In [None]:
from sklearn.svm import SVR
regressor = SVR()