In [187]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier, MLPRegressor

df_new = pd.read_csv('CleanElementry.csv', encoding = 'latin-1')
# Define features (X) and target (y) from the new dataset
X_new = df_new[['Pre_RI_Interest', 'Post_RI_Competence', 'Pre_RI_Competence', 
                'Post_Final', 'Pre_Final', 'Pre_RI_Self_Recognition', 'Post_RI_Self_Recognition',
                'Pre_RI_Recognition_ByOthers', 'Post_RI_Recognition_ByOthers', 
                'Teacher_Score', 'School']] # 'Grade', 'Teacher', 'Ethnicity.1', 'State'
y_new = df_new['Post_RI_Interest']

# Split the new data into training and testing sets (90% train, 10% test)
X_train_new, X_test_new, y_train_new, y_test_new = train_test_split(X_new, y_new, test_size=0.1, random_state=42)

# Initialize models
models_new = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42),
    'SVM': SVC(C = 1000),
    'Decision Tree': DecisionTreeClassifier(criterion= 'entropy', ccp_alpha= 0.04),
    'GaussianNB': GaussianNB()
}

# Dictionary to store the new results
results_new = {}

# Train and evaluate each model on the new dataset
for name, model in models_new.items():
    # Train the model
    model.fit(X_train_new, y_train_new)
    
    # Make predictions
    y_pred_new = model.predict(X_test_new)
    
    # Evaluate the model
    mae_new = mean_absolute_error(y_test_new, y_pred_new)
    r2_new = r2_score(y_test_new, y_pred_new)
    score = model.score(X_test_new, y_test_new)
    
    # Store the results
    results_new[name] = {'MAE': mae_new, 'R²': r2_new, 'Score': score}

# Convert new results to DataFrame for easy viewing
results_df_new = pd.DataFrame(results_new).T

results_df_new

Unnamed: 0,MAE,R²,Score
Linear Regression,1.994347,0.825503,0.825503
Random Forest,2.11125,0.820009,0.820009
Gradient Boosting,2.290406,0.781319,0.781319
SVM,1.75,0.786594,0.625
Decision Tree,3.0,0.606019,0.375
GaussianNB,2.75,0.556772,0.5
