In [None]:
# Gradient Boosting Regressor

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score


df = pd.read_csv("/content/Carbon Emission.csv")



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X[col].fillna(X[col].mode()[0], inplace=True)


Shape of X_final: (10000, 78)
RMSE: 216.1629983302678
R² Score: 0.9550581249948616


In [None]:
X = df.drop(columns=["CarbonEmission"])
y = df["CarbonEmission"]


categorical_cols = X.select_dtypes(include=["object"]).columns
numerical_cols = X.select_dtypes(include=["int64", "float64"]).columns


for col in categorical_cols:
    X[col].fillna(X[col].mode()[0], inplace=True)



In [None]:
# One-hot encode categorical variables
encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)  # Fix applied
X_encoded = encoder.fit_transform(X[categorical_cols])

# Convert to DataFrame
X_encoded_df = pd.DataFrame(X_encoded, columns=encoder.get_feature_names_out())

# Reset index to align numerical and encoded data before concatenation
X_numerical = X[numerical_cols].reset_index(drop=True)
X_encoded_df = X_encoded_df.reset_index(drop=True)

# Combine numerical and encoded categorical data
X_final = pd.concat([X_numerical, X_encoded_df], axis=1)

# Ensure X_final is not empty
print(f"Shape of X_final: {X_final.shape}")

# Split into 80% train and 20% test sets
X_train, X_test, y_train, y_test = train_test_split(X_final, y, test_size=0.2, random_state=42)



In [None]:
# Initialize Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Train the model
gbr.fit(X_train, y_train)

# Predict on test data
y_pred = gbr.predict(X_test)



In [None]:
# Calculate RMSE (Root Mean Squared Error)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate R² Score
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse}")
print(f"R² Score: {r2}")
