# GBR MODEL

Importing Libraries

In [8]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor
import numpy as np

Loading Data

In [9]:
# Load the data
file_path = "Data.csv"  # Update the path if the file is not in the same directory
data = pd.read_csv(file_path)

# Drop 'sr no.' column as it is not a feature
data = data.drop(columns=["sr no.","LL"])

Assigning X and Y

In [10]:
# Split the data into features (X) and target (y)
X = data.drop(columns=["cc"])  # All columns except 'cc'
y = data["cc"]  # 'cc' is the target variable

# Split the dataset into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Training ML models and Error Matrics

In [11]:
# Initialize and train the Gradient Boosting Regressor model
model = GradientBoostingRegressor(random_state=42, n_estimators=100, learning_rate=0.1, max_depth=3)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate error metrics
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X)
# Training metrics
train_mae = mean_absolute_error(y_train, y_train_pred)
train_mse = mean_squared_error(y_train, y_train_pred)
train_rmse = np.sqrt(train_mse)
train_r2 = r2_score(y_train, y_train_pred)

# Testing metrics
test_mae = mean_absolute_error(y, y_test_pred)
test_mse = mean_squared_error(y, y_test_pred)
test_rmse = np.sqrt(test_mse)
test_r2 = r2_score(y, y_test_pred)

# Print metrics
print("Training Metrics:")
print(f"MAE: {train_mae:.4f}")
print(f"MSE: {train_mse:.4f}")
print(f"RMSE: {train_rmse:.4f}")
print(f"R²: {train_r2:.4f}")
print("\nTesting Metrics:")
print(f"MAE: {test_mae:.4f}")
print(f"MSE: {test_mse:.4f}")
print(f"RMSE: {test_rmse:.4f}")
print(f"R²: {test_r2:.4f}")
cc_prediction = model.predict(X)

# Check if predictions have been made
print(f"Length of Y (Actual): {len(y)}")
print(f"Length of Predictions: {len(cc_prediction)}")

Training Metrics:
MAE: 0.0173
MSE: 0.0005
RMSE: 0.0225
R²: 0.9198

Testing Metrics:
MAE: 0.0196
MSE: 0.0007
RMSE: 0.0259
R²: 0.8880
Length of Y (Actual): 391
Length of Predictions: 391


Exporting csv file

In [12]:
# Ensure that both 'Y' and 'cc_prediction' have the same length
if len(y) != len(cc_prediction):
    print("Error: Mismatch in the number of actual and predicted values!")
else:
    # Create a DataFrame with actual and predicted values
    df = pd.DataFrame({
        'Actual Y': y,
        'Predicted Y': cc_prediction
    })

    # Export the DataFrame to a CSV file
    output_file_path = 'GBR_actual_vs_predicted.csv'
    df.to_csv(output_file_path, index=False)

    print(f"CSV file has been saved as {output_file_path}")

CSV file has been saved as GBR_actual_vs_predicted.csv
