In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import joblib
import warnings
warnings.filterwarnings("ignore")

In [24]:
file_path = 'data.csv'
df = pd.read_csv(file_path)

In [25]:
df

Unnamed: 0,Biofuels and waste,"Coal, peat and oil shale",Oil,Natural gas,CO2TES(tCO2 per TJ),country,year
0,0.002251,0.384980,5.920485,2.383026,59.253,Algeria,1971
1,0.002329,0.372553,6.312181,3.058608,57.281,Algeria,1972
2,0.002381,0.414828,7.169531,3.524308,54.459,Algeria,1973
3,0.002390,0.282616,7.702091,4.081844,54.996,Algeria,1974
4,0.002433,0.326198,8.708710,4.654142,58.629,Algeria,1975
...,...,...,...,...,...,...,...
1576,2.686574,6.237718,3.535056,0.000000,21.237,Zimbabwe,2017
1577,2.742387,6.673881,4.378176,0.000000,22.844,Zimbabwe,2018
1578,2.798347,6.155040,3.988528,0.000000,21.277,Zimbabwe,2019
1579,2.852824,4.937248,3.222354,0.000000,17.693,Zimbabwe,2020


In [26]:
label_encoder = LabelEncoder()
df['country'] = label_encoder.fit_transform(df['country'])

In [27]:
df

Unnamed: 0,Biofuels and waste,"Coal, peat and oil shale",Oil,Natural gas,CO2TES(tCO2 per TJ),country,year
0,0.002251,0.384980,5.920485,2.383026,59.253,0,1971
1,0.002329,0.372553,6.312181,3.058608,57.281,0,1972
2,0.002381,0.414828,7.169531,3.524308,54.459,0,1973
3,0.002390,0.282616,7.702091,4.081844,54.996,0,1974
4,0.002433,0.326198,8.708710,4.654142,58.629,0,1975
...,...,...,...,...,...,...,...
1576,2.686574,6.237718,3.535056,0.000000,21.237,30,2017
1577,2.742387,6.673881,4.378176,0.000000,22.844,30,2018
1578,2.798347,6.155040,3.988528,0.000000,21.277,30,2019
1579,2.852824,4.937248,3.222354,0.000000,17.693,30,2020


In [28]:
X = df[['country', 'year']]
y = df['CO2TES(tCO2 per TJ)']

In [29]:
X

Unnamed: 0,country,year
0,0,1971
1,0,1972
2,0,1973
3,0,1974
4,0,1975
...,...,...
1576,30,2017
1577,30,2018
1578,30,2019
1579,30,2020


In [30]:
y

0       59.253
1       57.281
2       54.459
3       54.996
4       58.629
         ...  
1576    21.237
1577    22.844
1578    21.277
1579    17.693
1580    18.603
Name: CO2TES(tCO2 per TJ), Length: 1581, dtype: float64

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
# Standardize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
model = MLPRegressor(hidden_layer_sizes=(180, 180,180), max_iter=2000, random_state=42)
model.fit(X_train_scaled, y_train)

MLPRegressor(hidden_layer_sizes=(180, 180, 180), max_iter=2000, random_state=42)

In [12]:
# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

In [13]:
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 37.94717124344468


In [14]:
# Calculate R-squared value
r2 = r2_score(y_test, y_pred)
print(f'R-squared: {r2}')

R-squared: 0.9187098415615863


In [17]:
# Example: Predict CO2 emissions for the year 1980 in a specific country
country_to_predict = 'Algeria'
year_to_predict = 2020

# Encode the country using the same LabelEncoder instance
encoded_country = label_encoder.transform([country_to_predict])[0]

# Make the prediction
prediction = model.predict(scaler.transform([[encoded_country, year_to_predict]]))

print(f"Predicted CO2 emissions for {country_to_predict} in {year_to_predict}: {prediction[0]}")

Predicted CO2 emissions for Algeria in 2020: 51.81548894261525


In [33]:
def predict_function():
    # Example: Predict CO2 emissions for the year 1980 in a specific country
    country_to_predict = 'Algeria'
    year_to_predict = 1980

    # Encode the country using the same LabelEncoder instance
    encoded_country = label_encoder.transform([country_to_predict])[0]

    # Make the prediction
    prediction = model.predict(scaler.transform([[encoded_country, year_to_predict]]))

    print(f"Predicted CO2 emissions for {country_to_predict} in {year_to_predict}: {prediction[0]}")

In [34]:
hiddenlayersize = []
accuracy = []

for lay in range(20,200,10):
    print("-----------------------------------------------------------------")
    print("Hidden Layer: ", lay)
    hiddenlayersize.append(lay)
    model = MLPRegressor(hidden_layer_sizes=(lay, lay,lay), max_iter=2000, random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    print(f'Mean Squared Error: {mse}')
    r2 = r2_score(y_test, y_pred)
    print(f'R-squared: {r2}')
    accuracy.append(r2*100)
    predict_function()
    print("-----------------------------------------------------------------\n")

-----------------------------------------------------------------
Hidden Layer:  20
Mean Squared Error: 436.51910937134716
R-squared: 0.0648918905036443
Predicted CO2 emissions for Algeria in 1980: 34.77982253483816
-----------------------------------------------------------------

-----------------------------------------------------------------
Hidden Layer:  30
Mean Squared Error: 340.1222023980003
R-squared: 0.27139265417229497
Predicted CO2 emissions for Algeria in 1980: 37.80453329561535
-----------------------------------------------------------------

-----------------------------------------------------------------
Hidden Layer:  40
Mean Squared Error: 309.4665319747163
R-squared: 0.3370630117796499
Predicted CO2 emissions for Algeria in 1980: 36.47143676779429
-----------------------------------------------------------------

-----------------------------------------------------------------
Hidden Layer:  50
Mean Squared Error: 153.88545139051067
R-squared: 0.6703476882466597

In [35]:
data = {"Size of Hidden Layers": hiddenlayersize, "Accuracy (in %)": accuracy}
df = pd.DataFrame(data)

df.style.hide_index()

Size of Hidden Layers,Accuracy (in %)
20,6.489189
30,27.139265
40,33.706301
50,67.034769
60,29.252563
70,6.529008
80,42.261511
90,38.288044
100,29.250235
110,43.795403


### Save the trained model

In [15]:
model_filename = 'mlp_model.joblib'
scaler_filename = 'scaler.joblib'
label_encoder_filename = 'label_encoder.joblib'

joblib.dump(model, model_filename)
joblib.dump(scaler, scaler_filename)
joblib.dump(label_encoder, label_encoder_filename)

['label_encoder.joblib']

### Load the saved model and preprocessing objects

In [16]:
loaded_model = joblib.load(model_filename)
loaded_scaler = joblib.load(scaler_filename)
loaded_label_encoder = joblib.load(label_encoder_filename)

### Example: Predict using user inputs

In [20]:
user_country = 'Mozambique'
user_year = 2016

#### Encode the user input country

In [21]:
encoded_user_country = loaded_label_encoder.transform([user_country])[0]

#### Make the prediction using the loaded model and scaler

In [22]:
user_prediction = loaded_model.predict(loaded_scaler.transform([[encoded_user_country, user_year]]))
print(f"Predicted CO2 emissions for {user_country} in {user_year}: {user_prediction[0]}")

Predicted CO2 emissions for Mozambique in 2016: 16.866792456291677
