# THE CLIMATE CRISIS REPORT

### Preparations for the analysis

In [None]:
# Preparing the environment (Before running this cell you need to upload 'kaggle.json' file)
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
# Datasets we will be use:
!wget https://nyc3.digitaloceanspaces.com/owid-public/data/co2/owid-co2-data.csv
!kaggle datasets download -d berkeleyearth/climate-change-earth-surface-temperature-data

In [None]:
# Deatach the zip file
import zipfile
zip_ref = zipfile.ZipFile('/content/climate-change-earth-surface-temperature-data.zip', 'r')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
# Importing libraries
import seaborn as sns
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np

## Turkey

In [None]:
# Preparing the dataset
data = pd.read_csv("./GlobalLandTemperaturesByCountry.csv")
data['Date'] = pd.to_datetime(data['dt']) # Converting all dates to the same format
data.drop(columns = ['dt'], axis = 1, inplace = True) # Dropping the dt column

# Creating 'Year' column
data['Year'] = data['Date'].dt.year

# For future analysis, we will work only on yearly data, as average (because there are dates missing and data is not consistent)
earth_data = data.groupby(by = ['Year', 'Country'])[['AverageTemperature']].mean().reset_index()
earth_data = earth_data.astype({'Year': 'int'})
turkey_data = earth_data[(earth_data['Year'] >= 1990) & (earth_data['Country'] == 'Turkey')].reset_index(drop=True)
turkey_data.tail()

In [None]:
# Function to add new rows
def add_new_temps(df, temp):
    last_index = df.index[-1]
    new_row = {'Year': df.at[last_index, 'Year'] + 1,
               'Country': df.at[last_index, 'Country'],
               'AverageTemperature': temp}
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    return df

temp_list = [14.9, 12.2, 13.8, 14.2, 15.4, 14.7, 14.9, 14.9, 14.5]
for temp in temp_list:
   turkey_data = add_new_temps(turkey_data, temp)

turkey_data.tail(20)

In [None]:
# Gas emissions data
data_co2 = pd.read_csv("./owid-co2-data.csv")
data_co2.shape

In [None]:
# Turkey's gas emissions data
turkey_co2 = data_co2[(data_co2['iso_code'] == 'TUR') & (data_co2['year'] >= 1990)][['year', 'country', 'co2', 'methane', 'nitrous_oxide', 'total_ghg']].reset_index(drop=True)
turkey_co2.tail()

In [None]:
# Checking for missing values
turkey_co2.isnull().sum()

In [None]:
# Filling missing values by the information from climatedata.imf.org
turkey_co2.iloc[-2, 3] = 64
turkey_co2.iloc[-1, 3] = 64.71
turkey_co2.iloc[-2, 4] = 40.30
turkey_co2.iloc[-1, 4] = 42.64
turkey_co2.iloc[-2, 5] = 516.66
turkey_co2.iloc[-1, 5] = 534.57

In [None]:
# Checking for added values
turkey_co2.tail()

In [None]:
# Merging the data
turkey_co2.columns = turkey_co2.columns.str.capitalize()
turkey_data = turkey_data.merge(turkey_co2, on='Year', suffixes=('', '_co2'))
columns_to_drop = turkey_data.filter(like='_co2').columns
turkey_data.drop(columns=columns_to_drop, inplace=True)
turkey_data.tail()

In [None]:
# Renaming the columns
column_mapping = {
    'Co2': 'CO2',
    'Nitrous_oxide': 'NO2',
    'Total_ghg': 'GreenhouseGas'
}

turkey_data.rename(columns=column_mapping, inplace=True)
turkey_data.tail()

In [None]:
# Plotting the data
columns = ['Year', 'Country']
turkey_subset = turkey_data.drop(columns=columns)
sns.heatmap(turkey_subset.corr(), annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


# Splitting data into training and testing sets
X = turkey_data[['CO2', 'Methane', 'NO2', 'GreenhouseGas']]
y = turkey_data['AverageTemperature']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluating the model
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print("Training R^2 score:", train_score)
print("Testing R^2 score:", test_score)

# Training model to predict CO2
model_CO2 = LinearRegression()
model_CO2.fit(turkey_data[['Year']], turkey_data['CO2'])

# Training model to predict Methane
model_Methane = LinearRegression()
model_Methane.fit(turkey_data[['Year']], turkey_data['Methane'])

# Training model to predict NO2
model_NO2 = LinearRegression()
model_NO2.fit(turkey_data[['Year']], turkey_data['NO2'])

# Training model to predict GreenhouseGas
model_GreenhouseGas = LinearRegression()
model_GreenhouseGas.fit(turkey_data[['Year']], turkey_data['GreenhouseGas'])

# Making predictions for future years
# Assuming you want to predict for the next 5 years
future_years = range(2023, 2033)
future_CO2 = model_CO2.predict(pd.DataFrame({'Year': future_years}))
future_Methane = model_Methane.predict(pd.DataFrame({'Year': future_years}))
future_NO2 = model_NO2.predict(pd.DataFrame({'Year': future_years}))
future_GreenhouseGas = model_GreenhouseGas.predict(pd.DataFrame({'Year': future_years}))

# Creating future_data DataFrame
future_data = pd.DataFrame({
    'CO2': future_CO2,
    'Methane': future_Methane,
    'NO2': future_NO2,
    'GreenhouseGas': future_GreenhouseGas
}, index=future_years)

future_predictions = model.predict(future_data)

# Visualizing the results
plt.figure(figsize=(10, 6))
plt.plot(turkey_data['Year'], turkey_data['AverageTemperature'], label='Actual Data')
plt.plot(future_years, future_predictions, 'ro--', label='Predicted Data')
plt.xlabel('Year')
plt.ylabel('Average Temperature (°C)')
plt.title('Average Temperature Prediction for Turkey')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Plotting
fig, ax1 = plt.subplots(figsize=(10, 6))

# Bar plot for temperature
ax1.bar(turkey_data['Year'], turkey_data['AverageTemperature'], color='lightblue', label='Average Temperature')
ax1.set_xlabel('Year')
ax1.set_ylabel('Average Temperature (°C)', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Creating a second y-axis
ax2 = ax1.twinx()
ax2.plot(turkey_data['Year'], turkey_data['CO2'], marker='o', color='red', label='CO2')
ax2.plot(turkey_data['Year'], turkey_data['Methane'], marker='o', color='green', label='Methane')
ax2.plot(turkey_data['Year'], turkey_data['NO2'], marker='o', color='orange', label='NO2')
ax2.plot(turkey_data['Year'], turkey_data['GreenhouseGas'], marker='o', color='purple', label='Greenhouse Gasses')

ax2.set_ylabel('Gas Emissions', color='black')
ax2.tick_params(axis='y', labelcolor='black')

# Adding legends
fig.legend(loc='upper left', bbox_to_anchor=(0.1,0.9))
# ax1.legend(loc='upper left', bbox_to_anchor=(0.1,0.8))
# ax2.legend(loc='upper left', bbox_to_anchor=(0.1,0.7))

plt.title('Gas Emissions and Average Temperature by Year in Turkey')
plt.show()

## Europe

In [None]:
# Europe's gas emissions data
europe_co2 = data_co2[(data_co2['country'] == 'Europe') & (data_co2['year'] >= 1990)][['year', 'country', 'co2', 'methane', 'nitrous_oxide', 'total_ghg']].reset_index(drop=True)
europe_co2.tail()

In [None]:
# Checking for missing values
europe_co2.isnull().sum()

In [None]:
# Filling missing values by the information from Our World in Data 
europe_co2.iloc[-2, 3] = 1153
europe_co2.iloc[-1, 3] = 1136.34
europe_co2.iloc[-2, 4] = 426.91
europe_co2.iloc[-1, 4] = 420.48
europe_co2.iloc[-2, 5] = 6829.95
europe_co2.iloc[-1, 5] = 6595.54

In [None]:
# Creating a new function for Europe

def add_new_temps(df, temp):
    last_index = df.index[-1]
    new_row = {'Year': df.at[last_index, 'Year'] + 1,
               'Country': df.at[last_index, 'Country'],
               'AverageTemperature': temp}
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    return df

In [None]:
# Country list

european_countries = [
    "Albania", "Andorra", "Austria", "Belarus", "Belgium", "Bulgaria",
    "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland",
    "France", "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Italy",
    "Latvia", "Liechtenstein", "Lithuania", "Luxembourg", "Macedonia",
    "Malta", "Montenegro", "Netherlands", "Norway", "Poland", "Portugal", "Moldova",
    "Romania", "Russia", "Serbia", "Slovakia", "Slovenia", "Spain", "Sweden",
    "Switzerland", "Ukraine", "United Kingdom"
]

In [None]:
# Checking for missing values
for country in european_countries:
  print(data_co2[(data_co2['country'] == country) & (data_co2['year'] == 2020)][['country', 'co2']])

In [None]:
# Creating Europe data
europe_data = earth_data[earth_data['Country'].isin(european_countries) & (earth_data['Year'] >= 1990)]

In [None]:
# Missing values from Trading Economics website

europe_temps = {
    'Albania': [12.93, 12.87, 12.9, 12.75, 13.43, 13.33, 13.07, 12.89, 13.31],
    'Andorra': [9.02, 8.81, 8.66, 8.59, 9.03, 8.9, 9.04, 8.38, 9.73],
    'Austria': [8.37, 8.31, 7.76, 7.71, 8.55, 8.46, 8.25, 7.41, 8.65],
    'Belarus': [7.97, 8.67, 7.81, 7.75, 8.11, 8.85, 9.09, 7.33, 7.98],
    'Belgium': [11.72, 11.08, 10.76, 11.07, 11.65, 11.39, 11.89, 10.48, 11.98],
    'Bulgaria': [12, 11.85, 11.9, 11.75, 12.34, 12.81, 12.49, 11.97, 12.28],
    'Croatia': [12.78, 12.67, 12.26, 12.29, 13.08, 13.16, 12.62, 12.19, 13.16],
    'Cyprus': [19.53, 18.91, 19.61, 19.21, 20.12, 19.49, 19.78, 19.87, 19.33],
    'Czech Republic': [9.75, 9.64, 9.03, 8.96, 9.91, 9.81, 9.61, 8.5, 9.71],
    'Denmark': [10.21, 9.38, 9.14, 9.05, 9.69, 9.71, 10.08, 8.96, 9.77],
    'Estonia': [6.96, 7.39, 6.48, 6.29, 7.02, 7.2, 8.14, 6.3, 6.9],
    'Finland': [3.33, 3.6, 2.97, 2.4, 3.1, 2.67, 4.15, 2.34, 3.24],
    'France': [12.48, 12.17, 11.74, 11.96, 12.64, 12.38, 12.74, 11.58, 13.16],
    'Germany': [10.64, 10.32, 9.98, 9.91, 10.73, 10.54, 10.78, 9.49, 10.83],
    'Greece': [14.67, 14.35, 14.84, 14.51, 15.2, 15.16, 15.01, 15.09, 14.88],
    'Hungary': [12.65, 12.4, 11.79, 11.86, 12.72, 12.86, 12.24, 11.58, 12.62],
    'Iceland': [2.78, 1.44, 2.68, 2.47, 2.26, 2.31, 2.03, 2.43, 2.11],
    'Ireland': [10.09, 9.44, 9.75, 9.99, 9.69, 9.86, 9.78, 9.94, 10.35],
    'Italy': [13.84, 13.8, 13.54, 13.46, 14.1, 13.94, 13.83, 13.52, 14.38],
    'Latvia': [7.36, 7.94, 7.08, 6.92, 7.6, 7.98, 8.56, 6.82, 7.4],
    'Liechtenstein': [8.44, 8.48, 7.87, 7.92, 8.69, 8.39, 8.65, 7.44, 9.05],
    'Lithuania': [7.84, 8.43, 7.69, 7.5, 8.19, 8.64, 8.96, 7.28, 7.95],
    'Luxembourg': [11.06, 10.52, 10.09, 10.41, 11.13, 10.79, 11.29, 9.77, 11.43],
    'Macedonia': [11.31, 11.18, 11.3, 11.17, 11.8, 11.91, 11.61, 11.39, 11.66],
    'Malta': [20.12, 20.01, 20.19, 19.74, 20.02, 19.87, 19.99, 20.22, 20.26],
    'Montenegro': [10.62, 10.65, 10.34, 10.4, 11.1, 11.13, 10.63, 10.25, 11],
    'Moldova': [11.33, 11.94, 11.42, 11.4, 11.52, 12.39, 12.43, 10.95, 11.9],
    'Netherlands': [11.63, 10.89, 10.67, 10.91, 11.41, 11.26, 11.69, 10.41, 11.72],
    'Norway': [3.3, 2.93, 2.61, 2.28, 2.53, 2.27, 3.39, 2.11, 2.88],
    'Poland': [9.79, 9.79, 9.25, 9.08, 9.95, 10.19, 10.03, 8.75, 9.7],
    'Portugal': [16.2, 16.26, 16.12, 16.52, 15.82, 16.13, 16.42, 16.1, 16.9],
    'Romania': [11.05, 11.09, 10.62, 10.66, 11.21, 11.7, 11.3, 10.35, 11.2],
    'Russia': [-3.56, -2.88, -3.21, -2.89, -3.41, -2.85, -1.74, -3.65, -2.87],
    'Serbia': [12.34, 12.18, 11.72, 11.87, 12.62, 12.79, 12.21, 11.7, 12.56],
    'Slovakia': [10.08, 9.83, 9.21, 9.09, 10.06, 10.17, 9.63, 8.79, 9.81],
    'Slovenia': [10.71, 10.56, 10.13, 10.11, 10.96, 10.99, 10.54, 10, 11.04],
    'Spain': [14.63, 14.58, 14.42, 14.68, 14.29, 14.48, 14.72, 14.31, 15.3],
    'Sweden': [4.23, 3.97, 3.49, 3.21, 3.63, 3.4, 4.53, 3.08, 3.84],
    'Switzerland': [7.37, 7.37, 6.71, 6.85, 7.68, 7.31, 7.64, 6.39, 8.08],
    'Ukraine': [9.74, 10.36, 9.71, 9.84, 9.88, 10.69, 10.86, 9.37, 10.04],
    'United Kingdom': [9.93, 9.18, 9.35, 9.62, 9.49, 9.55, 9.68, 9.39, 10.11]
}

In [None]:
# Adding missing values

last_year = europe_data['Year'].max()

# Creating a list to store new data
new_data = []

# Iterating over the countries and their new temperatures
for country, temps in europe_temps.items():
    for i, temp in enumerate(temps):
        new_year = last_year + i + 1
        new_data.append({'Year': new_year, 'Country': country, 'AverageTemperature': temp})

# Converting new data to a dataframe and append to the existing dataframe
new_df = pd.DataFrame(new_data)
europe_data = pd.concat([europe_data, new_df], ignore_index=True)
europe_data.head(20)

In [None]:
# Checking for missing values
country_temp_counts = europe_data.groupby('Country')['AverageTemperature'].count().reset_index()
country_temp_counts.columns = ['country', 'count']

In [None]:
# Calculating the average temperature for Europe year by year
europe_data = europe_data.groupby('Year')['AverageTemperature'].mean().reset_index()

# Adding a column for 'country' with the value 'Europe'
europe_data['Country'] = 'Europe'
europe_data = europe_data[['Year', 'Country', 'AverageTemperature']]
europe_data

In [None]:
# Merging the data

europe_co2.columns = europe_co2.columns.str.capitalize()
europe_data = europe_data.merge(europe_co2, on='Year', suffixes=('', '_co2'))
columns_to_drop = europe_data.filter(like='_co2').columns
europe_data.drop(columns=columns_to_drop, inplace=True)
europe_data.tail()

In [None]:
# Renaming the columns

column_mapping = {
    'Co2': 'CO2',
    'Nitrous_oxide': 'NO2',
    'Total_ghg': 'GreenhouseGas'
}

europe_data.rename(columns=column_mapping, inplace=True)
europe_data.tail()

In [None]:
# Plotting the data correlation

columns = ['Year', 'Country']
europe_subset = europe_data.drop(columns=columns)
sns.heatmap(europe_subset.corr(), annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)

In [None]:
# Machine learning

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Spliting data into training and testing sets
X = europe_data[['CO2', 'Methane', 'NO2', 'GreenhouseGas']]
y = europe_data['AverageTemperature']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluating the model
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print("Training R^2 score:", train_score)
print("Testing R^2 score:", test_score)

# Training models to predict individual pollutants over time
def train_pollutant_model(feature):
    model = LinearRegression()
    model.fit(europe_data[['Year']], europe_data[feature])
    return model

pollutants = ['CO2', 'Methane', 'NO2', 'GreenhouseGas']
models = {pollutant: train_pollutant_model(pollutant) for pollutant in pollutants}

# Making predictions for future years
future_years = range(2023, 2033)
future_data = pd.DataFrame({feature: models[feature].predict(pd.DataFrame({'Year': future_years})) for feature in pollutants}, index=future_years)

# Making temperature predictions based on future pollutant levels
future_predictions = model.predict(future_data)

# Visualize the results
plt.figure(figsize=(10, 6))
plt.plot(europe_data['Year'], europe_data['AverageTemperature'], label='Actual Data', marker='o')
plt.plot(future_years, future_predictions, 'ro--', label='Predicted Data')
plt.xlabel('Year')
plt.ylabel('Average Temperature (°C)')
plt.title('Average Temperature Prediction for Europe')
plt.legend()
plt.grid(True)
plt.show()
