# Introduction

### <b>Project Title:</b> Predicting Pakistan CO2 Emissions from Solid Fuel Consumption Using Climate Change Indicators.

### <b>Data:</b> Climate Change Indicators Dataset By The Humanitarian Data Exchange (HDX).

# Importing necessary libraries.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM




# Loading Data and Converting into DataFrame.

In [2]:
data = pd.read_csv('Datasets/Pak_Climate_Change_Indicators_By_HDX.csv')

In [3]:
df = pd.DataFrame(data)

In [4]:
df.head()

Unnamed: 0,Sno,Country Name,Year,CO2 intensity (kg per kg of oil equivalent energy use) Value,CO2 emissions from solid fuel consumption (kt) Value,Cereal yield (kg per hectare) Value,Urban population growth (annual %) Value,Population growth (annual %) Value,"Population, total Value",Urban population Value,...,"Other greenhouse gas emissions, HFC, PFC and SF6 (thousand metric tons of CO2 equivalent) Value",Urban population (% of total population) Value,Agricultural land (sq. km) Value,"Mortality rate, under-5 (per 1,000 live births) Value","Agriculture, forestry, and fishing, value added (% of GDP) Value",CO2 emissions from solid fuel consumption (% of total) Value,Population in urban agglomerations of more than 1 million (% of total population) Value,CO2 emissions from liquid fuel consumption (% of total) Value,Agricultural land (% of land area) Value,RainFall Mean ()
0,1,Pakistan,1960,,5892.869,,,,44988690,9944300,...,,22.104,,252.0,43.189201,41.632124,11.501177,46.295337,,22.968764
1,2,Pakistan,1961,,5804.861,856.4,4.144844,2.364729,46065229,10365137,...,,22.501,357300.0,243.0,41.727426,39.604704,11.799774,46.785089,46.349626,30.017449
2,3,Pakistan,1962,,6189.896,858.0,3.536128,2.431188,47198886,10738219,...,,22.751,358400.0,234.5,40.029233,38.433515,12.057857,46.698543,46.49232,22.300155
3,4,Pakistan,1963,,6681.274,877.7,3.583896,2.486695,48387293,11130045,...,,23.002,358800.0,226.5,38.840015,36.086354,12.315756,48.385819,46.544209,18.846562
4,5,Pakistan,1964,,6523.593,886.5,3.629236,2.531036,49627623,11541400,...,,23.256,363550.0,219.3,38.324054,34.270853,12.575408,47.640146,47.160388,25.169663


In [None]:
# EDA
# Display basic statistics
print(dataset.describe())

# Check for missing values
print(dataset.isna().sum())

# Data Cleaning and Preprocessing
# Drop unnecessary columns
dataset = dataset.drop(['Sno', 'Country Name'], axis=1)

# Impute missing values with mean
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
dataset = pd.DataFrame(imputer.fit_transform(dataset), columns=dataset.columns)

# Splitting into test-train sets
X = dataset.drop(['Year', 'CO2 emissions from solid fuel consumption (kt) Value'], axis=1)
y = dataset['CO2 emissions from solid fuel consumption (kt) Value']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Feature Scaling
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Training CNN
model_cnn = Sequential()
model_cnn.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)))
model_cnn.add(MaxPooling1D(pool_size=2))
model_cnn.add(Flatten())
model_cnn.add(Dense(50, activation='relu'))
model_cnn.add(Dense(1))

model_cnn.compile(optimizer='adam', loss='mean_squared_error')

# Reshape input data for CNN
X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

model_cnn.fit(X_train_cnn, y_train, epochs=50, batch_size=32, validation_data=(X_test_cnn, y_test))

# Training RNN (LSTM)
model_rnn = Sequential()
model_rnn.add(LSTM(50, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
model_rnn.add(Dense(1))

model_rnn.compile(optimizer='adam', loss='mean_squared_error')

model_rnn.fit(X_train_cnn, y_train, epochs=50, batch_size=32, validation_data=(X_test_cnn, y_test))

# Evaluating both models
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred = y_pred.reshape((y_pred.shape[0],))
    scores = {
        "R2": r2_score(y_test, y_pred),
        "MAE": mean_absolute_error(y_test, y_pred),
        "MSE": mean_squared_error(y_test, y_pred),
    }
    return scores

# Evaluate CNN
scores_cnn = evaluate_model(model_cnn, X_test_cnn, y_test)
print("CNN Model Scores:")
print(scores_cnn)

# Evaluate RNN
scores_rnn = evaluate_model(model_rnn, X_test_cnn, y_test)
print("RNN Model Scores:")
print(scores_rnn)

# Best matching required visualizations
# Visualize predictions vs actual values for CNN
plt.figure(figsize=(12, 6))
plt.plot(y_test.values, label='Actual CO2 Emissions')
plt.plot(model_cnn.predict(X_test_cnn), label='Predicted CO2 Emissions (CNN)')
plt.title('CNN Model - Actual vs Predicted CO2 Emissions')
plt.xlabel('Samples')
plt.ylabel('CO2 Emissions (kt)')
plt.legend()
plt.show()

# Visualize predictions vs actual values for RNN
plt.figure(figsize=(12, 6))
plt.plot(y_test.values, label='Actual CO2 Emissions')
plt.plot(model_rnn.predict(X_test_cnn), label='Predicted CO2 Emissions (RNN)')
plt.title('RNN Model - Actual vs Predicted CO2 Emissions')
plt.xlabel('Samples')
plt.ylabel('CO2 Emissions (kt)')
plt.legend()
plt.show()