In [67]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import joblib
import warnings
warnings.filterwarnings("ignore")

In [68]:
file_path = 'data.csv'
df = pd.read_csv(file_path)

In [69]:
label_encoder = LabelEncoder()
df['country'] = label_encoder.fit_transform(df['country'])
df

Unnamed: 0,Biofuels and waste,"Coal, peat and oil shale",Oil,Natural gas,CO2TES(tCO2 per TJ),country,year
0,0.002251,0.384980,5.920485,2.383026,59.253,0,1971
1,0.002329,0.372553,6.312181,3.058608,57.281,0,1972
2,0.002381,0.414828,7.169531,3.524308,54.459,0,1973
3,0.002390,0.282616,7.702091,4.081844,54.996,0,1974
4,0.002433,0.326198,8.708710,4.654142,58.629,0,1975
...,...,...,...,...,...,...,...
1576,2.686574,6.237718,3.535056,0.000000,21.237,30,2017
1577,2.742387,6.673881,4.378176,0.000000,22.844,30,2018
1578,2.798347,6.155040,3.988528,0.000000,21.277,30,2019
1579,2.852824,4.937248,3.222354,0.000000,17.693,30,2020


In [70]:
X = df[['country', 'year', 'Biofuels and waste', 'Coal, peat and oil shale', 'Oil', 'Natural gas']]
y = df['CO2TES(tCO2 per TJ)']

In [71]:
X

Unnamed: 0,country,year,Biofuels and waste,"Coal, peat and oil shale",Oil,Natural gas
0,0,1971,0.002251,0.384980,5.920485,2.383026
1,0,1972,0.002329,0.372553,6.312181,3.058608
2,0,1973,0.002381,0.414828,7.169531,3.524308
3,0,1974,0.002390,0.282616,7.702091,4.081844
4,0,1975,0.002433,0.326198,8.708710,4.654142
...,...,...,...,...,...,...
1576,30,2017,2.686574,6.237718,3.535056,0.000000
1577,30,2018,2.742387,6.673881,4.378176,0.000000
1578,30,2019,2.798347,6.155040,3.988528,0.000000
1579,30,2020,2.852824,4.937248,3.222354,0.000000


In [72]:
y

0       59.253
1       57.281
2       54.459
3       54.996
4       58.629
         ...  
1576    21.237
1577    22.844
1578    21.277
1579    17.693
1580    18.603
Name: CO2TES(tCO2 per TJ), Length: 1581, dtype: float64

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [74]:
# Standardize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [75]:
model = MLPRegressor(hidden_layer_sizes=(170, 170, 170), max_iter=2000, random_state=42)
model.fit(X_train_scaled, y_train)

MLPRegressor(hidden_layer_sizes=(170, 170, 170), max_iter=2000, random_state=42)

In [76]:
# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

In [77]:
# Calculate R-squared value
r2 = r2_score(y_test, y_pred)
print(f'R-squared: {r2}')

R-squared: 0.9820586750258126


In [49]:
# # Example: Predict CO2 emissions for the year 1980 in a specific country
# country_to_predict = 'Benin'
# year_to_predict = 2016
# biofuel = 0.608
# coal_peat = 0.196
# oil = 6.495
# natural_gas = 0.00384

# # Encode the country using the same LabelEncoder instance
# encoded_country = label_encoder.transform([country_to_predict])[0]

# # Make the prediction
# prediction = model.predict(scaler.transform([[encoded_country, year_to_predict, biofuel, coal_peat, oil, natural_gas]]))

# print(f"Predicted CO2 emissions for {country_to_predict} in {year_to_predict}: {prediction[0]}")

In [78]:
# Save the trained model and preprocessing objects
model_filename = 'mlp_model_oil_gas.joblib'
scaler_filename = 'scaler_oil_gas.joblib'
label_encoder_filename = 'label_encoder_oil_gas.joblib'

joblib.dump(model, model_filename)
joblib.dump(scaler, scaler_filename)
joblib.dump(label_encoder, label_encoder_filename)

['label_encoder_oil_gas.joblib']

In [79]:
loaded_model = joblib.load(model_filename)
loaded_scaler = joblib.load(scaler_filename)
loaded_label_encoder = joblib.load(label_encoder_filename)

In [80]:
# user_country = 'Canada'
# user_year = 2022
# user_oil = 15.0  # Replace with the actual oil value
# user_natural_gas = 8.0  # Replace with the actual natural gas value

country_to_predict = 'Mozambique'
year = 2016
biofuel = 1.587
coal_peat = 0.0047
oil = 4.515
natural_gas = 1.85


# Encode the user input country
encoded_user_country = loaded_label_encoder.transform([country_to_predict])[0]

# Make the prediction using the loaded model and scaler
user_prediction = loaded_model.predict(loaded_scaler.transform([[encoded_user_country, year, biofuel, coal_peat, oil, natural_gas]]))

print(f"Predicted CO2 emissions for {country_to_predict} in {year} with Oil={oil}, Natural Gas={natural_gas}: {user_prediction[0]}")

Predicted CO2 emissions for Mozambique in 2016 with Oil=4.515, Natural Gas=1.85: 14.399403805064084


In [82]:
def predict_function():
    country_to_predict = 'Mozambique'
    year = 2016
    biofuel = 1.587
    coal_peat = 0.0047
    oil = 4.515
    natural_gas = 1.85


    # Encode the country using the same LabelEncoder instance
    encoded_country = label_encoder.transform([country_to_predict])[0]

    # Make the prediction
    prediction = model.predict(scaler.transform([[encoded_user_country, year, biofuel, coal_peat, oil, natural_gas]]))

    print(f"Predicted CO2 emissions for {country_to_predict} in {year_to_predict}: {prediction[0]}")

In [83]:
hiddenlayersize = []
accuracy = []
for lay in range(20,200,10):
    print("-----------------------------------------------------------------")
    print("Hidden Layer: ", lay)
    hiddenlayersize.append(lay)
    model1 = MLPRegressor(hidden_layer_sizes=(lay, lay,lay), max_iter=2000, random_state=42)
    model1.fit(X_train_scaled, y_train)
    y_pred = model1.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    print(f'Mean Squared Error: {mse}')
    r2 = r2_score(y_test, y_pred)
    print(f'R-squared: {r2}')
    accuracy.append(r2*100)
    predict_function()
    print("-----------------------------------------------------------------\n")

-----------------------------------------------------------------
Hidden Layer:  20
Mean Squared Error: 15.370642150868651
R-squared: 0.967073120477719
Predicted CO2 emissions for Mozambique in 2016: 14.399403805064084
-----------------------------------------------------------------

-----------------------------------------------------------------
Hidden Layer:  30
Mean Squared Error: 12.256108600466874
R-squared: 0.9737450519413231
Predicted CO2 emissions for Mozambique in 2016: 14.399403805064084
-----------------------------------------------------------------

-----------------------------------------------------------------
Hidden Layer:  40
Mean Squared Error: 13.215624505188321
R-squared: 0.9716895838428292
Predicted CO2 emissions for Mozambique in 2016: 14.399403805064084
-----------------------------------------------------------------

-----------------------------------------------------------------
Hidden Layer:  50
Mean Squared Error: 14.867870689765141
R-squared: 0.9681

In [86]:
data = {"Size of Hidden Layers": hiddenlayersize, "Accuracy (in %)": accuracy}
df = pd.DataFrame(data)

df.style.hide_index()

Size of Hidden Layers,Accuracy (in %)
20,96.707312
30,97.374505
40,97.168958
50,96.815015
60,96.647606
70,97.879448
80,96.48737
90,96.622948
100,97.193453
110,97.518197
