In [9]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb



def calculate_accuracies(model):
    # Read the dataset
    dataset = pd.read_csv(r"C:\Users\jayso\OneDrive - Howard University\Howard\Summer 2023\PARADIM\API\NewXNiO.csv", header=None)

    # Get the non-numeric columns
    categorical_columns = dataset.select_dtypes(exclude=["number"]).columns

    # Initialize the LabelEncoder
    label_encoder = LabelEncoder()

    # Encode the categorical columns
    for col in categorical_columns:
        dataset[col] = label_encoder.fit_transform(dataset[col])

    # Separate the input (X) and output (y) columns
    X = dataset.iloc[:, :-1]
    y = dataset.iloc[:, -1]

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    
    # Train the model
    model.fit(X_train, y_train)

    # Predict using the trained model
    y_pred = model.predict(X_test)

    # Calculate mean squared error
    mse = mean_squared_error(y_test, y_pred)

    # Calculate root mean squared error
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    
    # Calculate R-squared score
    r2 = r2_score(y_test, y_pred)
    
    return mse, rmse, r2



In [5]:
model1 = DecisionTreeRegressor()
model2 = xgb.XGBRegressor()
model3 = RandomForestRegressor()

In [31]:
model1 = DecisionTreeRegressor()
mse, rmse, r2 = calculate_accuracies(model1)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared Score:", r2)

Mean Squared Error (MSE): 937182.6445131036
Root Mean Squared Error (RMSE): 968.0819410117635
R-squared Score: 0.7741426165500574


In [21]:
model2 = xgb.XGBRegressor()

mse, rmse, r2 = calculate_accuracies(model2)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared Score:", r2)

Mean Squared Error (MSE): 736474.8375751046
Root Mean Squared Error (RMSE): 858.1811216608675
R-squared Score: 0.8959724797358888


In [18]:
model3 = RandomForestRegressor()
mse, rmse, r2 = calculate_accuracies(model3)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared Score:", r2)

Mean Squared Error (MSE): 813556.6582326876
Root Mean Squared Error (RMSE): 901.9737569534313
R-squared Score: 0.8706910708096544
