---
## Note from the Teachers:
We added a header with information about your project. 
This is useful since we want to keep a database of all the cool projects participants have made during the courses.
We will save the code in our database and host on github a page to show all the projects. Therefore it is more practical to add some information in the header of each notebook.
Thanks a lot for the understanding and for taking care of it.

You can delete this cell if you want

---

# Climate Change
We try to simulate Climate Scenario Analysis using 3 different IPCC climate scenarios; RCP3PD, RCP4.5, and RCP6 and predict temperature for each scenario in the year 2100

## Participants:
Vineet Shah,
Shilpika Sarvepalli

### Course and Semester
Deep Learning from Scratch,
SoSe2021

### License
If you are releasing the software under some certain license, you can mention it and also include the `LICENSE.md` file in the folder

---

# Climate Scenario Anaylsis
Predicting the temperature anomaly

In [None]:
#Setting Up

import os # import OS module to interact with Operating System
import datetime # import module to manipulate date and time

import IPython # import ipython command shell
import IPython.display # import display tools

import matplotlib as mpl # import matplot library for visualization
import matplotlib.pyplot as plt # import library to plot timeseries data

import numpy as np # import numpy library for mathematical operations
import pandas as pd # import pandas library to process timeseries data
import seaborn as sns # import seaborn library to process statistical graphics
import tensorflow as tf # import tensorflow 
from tensorflow.keras import layers

from tensorflow import keras # import Keras to build and summarize the model
import plotly.express as px # import plotly.express to create graphs

from sklearn.preprocessing import MinMaxScaler # to normalize training data

mpl.rcParams['figure.figsize'] = (8,6) # setting the size of figures plotted using mpl
mpl.rcParams['axes.grid'] = False # setting not to show gridlines on plots


In [None]:
# Importing Data from drive

from google.colab import files
uploaded = files.upload()

import io
df = pd.read_csv(io.BytesIO(uploaded['Train.csv']))


In [None]:
# Check Training Data

df.head()


In [None]:
# Visualizing the data

plot_cols = ['CO2EQ', 'CO2', 'CH4', 'N2O','Temp_Anomaly']
plot_features = df[plot_cols]
plot_features.index = df['Years']
_ = plot_features.plot(subplots=True)

In [None]:
# Splitting the dataframe for Training and Validation dataframes

column_indices = {name: i for i, name in enumerate(df.columns)}

n = len(df)
train_df = df[0:int(n*0.6)] 
val_df = df[int(n*0.6):int(n*0.68)] 
test_df = df[int(n*0.68):int(n)]

num_features = df.shape[1]

In [None]:
# check shape of training and validation dataframes

print(train_df.shape)
print(val_df.shape)
print(test_df.shape)

In [None]:
# Scaling the  data between 0 and 1 using minmaxscaler from keras
# LSTM works better with scaled data

scaler = MinMaxScaler(feature_range=(0,1))
train = scaler.fit_transform(train_df) 
val = scaler.transform(val_df) 
test = scaler.transform(test_df)

In [None]:
# split data into input features and targets

train_x, train_y = train[:,:-1], train[:,-1] 
val_x, val_y = val[:,:-1], val[:,-1]
test_x, test_y = test[:,:-1], test[:,-1]

In [None]:
# reshape train_x and test_x for the model

train_x = train_x.reshape((train_x.shape[0], 1, train_x.shape[1]))

val_x = val_x.reshape((val_x.shape[0], 1, val_x.shape[1]))

test_x = test_x.reshape((test_x.shape[0], 1, test_x.shape[1]))

print(train_x.shape, train_y.shape, val_x.shape, val_y.shape, test_x.shape, test_y.shape)

In [None]:
# building the LSTM model using keras

def build_model(train_x):
    inputs = keras.layers.Input(shape = (train_x.shape[1], train_x.shape[2]))
    x = keras.layers.LSTM(50,return_sequences =  True)(inputs) 
    x = keras.layers.Dropout(0.3)(x) 
    x = keras.layers.LSTM(50, return_sequences = True)(x)
    x = keras.layers.Dropout(0.3)(x)
    x = keras.layers.LSTM(50)(x)
    outputs = keras.layers.Dense(1, activation = 'linear')(x) 

    model = keras.Model(inputs = inputs, outputs = outputs)
    model.compile(optimizer = 'adam', loss = "mse") 
    return model

model = build_model(train_x)
model.summary()

In [None]:
# fitting the model with model.fit without shuffling

process = model.fit(train_x, train_y, epochs = 50, batch_size = 72, validation_data = (val_x, val_y), shuffle = False)

In [None]:
# plotting the loss and validation loss against epochs

def plot_process(process):
    plt.plot(process.history['loss'], label='train')
    plt.plot(process.history['val_loss'], label='validation')
    plt.grid()
    plt.legend()
    plt.show()

plot_process(process)

In [None]:
def prediction(model,test_x,train_x, df):
    # Predict using the model
    predict =  model.predict(test_x)

    # Reshape test_x and train_x for visualization and reshaping to original shape
    test_x = test_x.reshape((test_x.shape[0], test_x.shape[2]))
    train_x = train_x.reshape((train_x.shape[0], train_x.shape[2]))

    # Concatenate test_x with predicted value
    predict_ = np.concatenate((test_x, predict),axis = 1)

    # Inverse-scaling to get the real values, removing min max scaler
    predict_ = scaler.inverse_transform(predict_)
    original_ = scaler.inverse_transform(test)

    # Create dataframe to store the predicted and original values
    pred = pd.DataFrame()
    pred['Years'] = df['Years'][-test_x.shape[0]:]
    pred['Original'] = original_[:,-1] 
    pred['Predicted'] = predict_[:,-1] 

    
    # Create dataframe for visualization
    df = df[['Years','Temp_Anomaly']][:-test_x.shape[0]]
    df.columns = ['Years','Original']
    original = df.append(pred[['Years','Original']])
    df.columns = ['Years','Predicted']
    predicted = df.append(pred[['Years','Predicted']])
    original = original.merge(predicted, left_on = 'Years',right_on = 'Years')
    return pred, original

pred, original = prediction(model, test_x, train_x, df)

In [None]:


# Plotting the predicted values

def plot(df):
    fig = px.line(title = 'Temperature Prediction')
    fig.add_scatter(x = df['Years'], y = df['Original'], name = 'Recorded Temperature Anomaly', opacity = 0.7)
    fig.add_scatter(x = df['Years'], y = df['Predicted'], name = 'Predicted Temperature Anomaly', opacity = 0.5)
    fig.show()

plot(original)

# Model Validation using Radiative Forcing
Here, we use the Radiative Forcing data set in the same model to validate how well the model is working.

In [None]:
#Setting Up

import os # import OS module to interact with Operating System
import datetime # import module to manipulate date and time

import IPython # import ipython command shell
import IPython.display # import display tools

import matplotlib as mpl # import matplot library for visualization
import matplotlib.pyplot as plt # import library to plot timeseries data

import numpy as np # import numpy library for mathematical operations
import pandas as pd # import pandas library to process timeseries data
import seaborn as sns # import seaborn library to process statistical graphics
import tensorflow as tf # import tensorflow 

from tensorflow import keras # import Keras to build and summarize the model
import plotly.express as px # import plotly.express to create graphs

from sklearn.preprocessing import MinMaxScaler # to normalize training data

mpl.rcParams['figure.figsize'] = (8,6) # setting the size of figures plotted using mpl
mpl.rcParams['axes.grid'] = False # setting not to show gridlines on plots


In [None]:
# Importing Data from computer 

from google.colab import files
uploaded = files.upload()

import io
df = pd.read_csv(io.BytesIO(uploaded['Train.csv']))


In [None]:
# Check Training Data

df.head()


In [None]:
# Visualizing the data

plot_cols = ['CO2EQ', 'CO2', 'CH4', 'N2O','RF']
plot_features = df[plot_cols]
plot_features.index = df['Years']
_ = plot_features.plot(subplots=True)

In [None]:
# Splitting the dataframe into Training and Validation dataframes

column_indices = {name: i for i, name in enumerate(df.columns)}

n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):int(n)]
num_features = df.shape[1]

In [None]:
# check shape of training and validation dataframes

print(train_df.shape)
print(val_df.shape)
print(test_df.shape)

In [None]:
# Normalizing the training data between 0 and 1 using minmaxscaler from keras

scaler = MinMaxScaler(feature_range=(0,1))
train = scaler.fit_transform(train_df)
val = scaler.transform(val_df)
test = scaler.transform(test_df)

In [None]:
# split data into input features and targets

train_x, train_y = train[:,:-1], train[:,-1]
val_x, val_y = val[:,:-1], val[:,-1]
test_x, test_y = test[:,:-1], test[:,-1]

In [None]:
# reshape train_x and test_x for the model

train_x = train_x.reshape((train_x.shape[0], 1, train_x.shape[1]))
val_x = val_x.reshape((val_x.shape[0], 1, val_x.shape[1]))
test_x = test_x.reshape((test_x.shape[0], 1, test_x.shape[1]))
print(train_x.shape, train_y.shape, val_x.shape, val_y.shape, test_x.shape, test_y.shape)

In [None]:
# building the LSTM model using keras

def build_model(train_x):
    inputs = keras.layers.Input(shape = (train_x.shape[1], train_x.shape[2]))
    x = keras.layers.LSTM(50,return_sequences =  True)(inputs)
    x = keras.layers.Dropout(0.3)(x)
    x = keras.layers.LSTM(50, return_sequences = True)(x)
    x = keras.layers.Dropout(0.3)(x)
    x = keras.layers.LSTM(50)(x)
    outputs = keras.layers.Dense(1, activation = 'linear')(x)

    model = keras.Model(inputs = inputs, outputs = outputs)
    model.compile(optimizer = 'adam', loss = "mae")
    return model

model = build_model(train_x)
model.summary()

In [None]:
# fitting the model with model.fit without shuffling

process = model.fit(train_x, train_y, epochs = 50, batch_size = 72, validation_data = (val_x, val_y), shuffle = False)

In [None]:
# plotting the loss and validation loss against epochs

def plot_process(process):
    plt.plot(process.history['loss'], label='train')
    plt.plot(process.history['val_loss'], label='validation')
    plt.grid()
    plt.legend()
    plt.show()

plot_process(process)

In [None]:
def prediction(model,test_x,train_x, df):
    # Predict using the model
    predict =  model.predict(test_x)

    # Reshape test_x and train_x for visualization  and inverse-scaling purpose
    test_x = test_x.reshape((test_x.shape[0], test_x.shape[2]))
    train_x = train_x.reshape((train_x.shape[0], train_x.shape[2]))

    # Concatenate test_x with predicted value
    predict_ = np.concatenate((test_x, predict),axis = 1)

    # Inverse-scaling to get the real values
    predict_ = scaler.inverse_transform(predict_)
    original_ = scaler.inverse_transform(test)

    # Create dataframe to store the predicted and original values
    pred = pd.DataFrame()
    pred['Years'] = df['Years'][-test_x.shape[0]:]
    pred['Original'] = original_[:,-1]
    pred['Predicted'] = predict_[:,-1]

    # Calculate the error 
    pred['Error'] = pred['Original'] - pred['Predicted']
    
    # Create dataframe for visualization
    df = df[['Years','RF']][:-test_x.shape[0]]
    df.columns = ['Years','Original']
    original = df.append(pred[['Years','Original']])
    df.columns = ['Years','Predicted']
    predicted = df.append(pred[['Years','Predicted']])
    original = original.merge(predicted, left_on = 'Years',right_on = 'Years')
    return pred, original

pred, original = prediction(model, test_x, train_x, df)

In [None]:
def plot(df):
    # Plotting the Current and Predicted values
    fig = px.line(title = 'Prediction vs. Actual')
    fig.add_scatter(x = df['Years'], y = df['Original'], name = 'Original', opacity = 0.7)
    fig.add_scatter(x = df['Years'], y = df['Predicted'], name = 'Predicted', opacity = 0.5)
    fig.show()
    
plot(original)