In [7]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import xgboost as xgb
import pandas as pd

In [10]:
output_df = pd.read_excel('xlsx/output.xlsx')
target_column_name = 'Transfer Value'

In [32]:
def random_forest_metrics(df, target_column_name):
    """
    Trains a Random Forest Regressor on the given DataFrame and calculates MSE, MAE, and R2.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the features and target variable.
    target_column_name (str): The name of the target variable column.

    Returns:
    tuple: A tuple containing the MSE, MAE, and R2 values.
    """
    # Splitting the data into features and target
    X = df.drop(target_column_name, axis=1)
    y = df[target_column_name]

    # Splitting the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

    # Creating and fitting the Random Forest model
    model = RandomForestRegressor(random_state=42)
    model.fit(X_train, y_train)

    # Making predictions and calculating metrics
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    return {'mse': mse, 'mae': mae, 'r2': r2}

In [33]:
def multilinear_regression_metrics(df, target_column_name):
    """
    Trains a Multilinear Regression model on the given DataFrame and calculates MSE, MAE, and R2.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the features and target variable.
    target_column_name (str): The name of the target variable column.

    Returns:
    tuple: A tuple containing the MSE, MAE, and R2 values.
    """
    # Splitting the data into features and target
    X = df.drop(target_column_name, axis=1)
    y = df[target_column_name]

    # Splitting the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

    # Creating and fitting the Multilinear Regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Making predictions and calculating metrics
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    return {'mse': mse, 'mae': mae, 'r2': r2}

In [34]:
def xgboost_metrics(df, target_column_name):
    """
    Trains an XGBoost model on the given DataFrame and calculates MSE, MAE, and R2.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the features and target variable.
    target_column_name (str): The name of the target variable column.

    Returns:
    tuple: A tuple containing the MSE, MAE, and R2 values.
    """
    # Splitting the data into features and target
    X = df.drop(target_column_name, axis=1)
    y = df[target_column_name]

    # Splitting the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

    # Creating and fitting the XGBoost model
    model = xgb.XGBRegressor(objective ='reg:squarederror', random_state=42)
    model.fit(X_train, y_train)

    # Making predictions and calculating metrics
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    return {'mse': mse, 'mae': mae, 'r2': r2}

In [29]:
random_forest_metrics(output_df, target_column_name)

{'mse': 19.242160976791993, 'mae': 1.3317373245407442, 'r2': 0.754616507122292}

In [30]:
multilinear_regression_metrics(output_df, target_column_name)

{'mse': 30.070568865509827, 'mae': 2.470377795716926, 'r2': 0.5693206616861591}

In [31]:
xgboost_metrics(output_df, target_column_name)

{'mse': 17.797995992915133,
 'mae': 1.3561489146893535,
 'r2': 0.7450919810721339}