<a href="https://colab.research.google.com/github/livinNector/climate-change-hackathon/blob/main/climate_change_hackathon_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://oauth2: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@github.com/Ananthzeke/climate-change-hackathon.git

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from datetime import datetime

## AQI Data

In [None]:
df = pd.read_excel("/content/climate-change-hackathon/aqi/AQI all_processed.xlsx")
df.rename(columns = {"Location":"location"},inplace=True)
df = df.melt(id_vars=["location"], 
        var_name="date", 
        value_name="aqi").sort_values(["location","date"])
locations_count = df.groupby(["location"],as_index=False).count()
# take only the locations that have readings for more than 24 months
eligible_locations = locations_count[locations_count["aqi"]>24].location.to_list()
aqi_filtered = df[df.location.isin(eligible_locations)].reset_index(drop=True)
aqi_filtered

In [None]:
aqi_filtered

In [None]:
aqi_vals = set(aqi_filtered.set_index(["location","date"]).index)

## AQI AAQ Map

In [None]:
df = pd.read_csv("/content/climate-change-hackathon/Dataset/updated_aaq_2016_to_2022.csv",parse_dates=True)
df = df.sort_values(["location_aaq","Date"]).reset_index(drop=True)
df.rename(columns={"Date":"date"},inplace=True)
df

In [None]:
import json
with open("aaq_aqi_map.json") as f:
  aaq_aqi_map = json.load(f)

In [None]:
from functools import reduce

joint_dfs = []
for aqi_loc,aaq_locs in aaq_aqi_map.items():
  similar_dfs = [df[df["location_aaq"]== loc].drop(columns="location_aaq").set_index("date") for loc in aaq_locs]
  joint_df = reduce(lambda a,b: a.add(b,fill_value=0),similar_dfs)
  joint_df.reset_index(inplace=True)
  joint_df["location"] = aqi_loc
  joint_dfs.append(joint_df)
aaq_merged = pd.concat(joint_dfs)

In [None]:
aaq_merged = aaq_merged.sort_values(["location","date"])
aaq_merged["date"] = aaq_merged["date"].apply(lambda x:datetime.strptime(x,"%Y-%m-%d") )

In [None]:
aaq_vals = set(aaq_merged.set_index(["location","date"]).index)

In [None]:
sorted(aqi_vals-aaq_vals)

## AAQ AQI merge

In [None]:
aqi_aaq_df = aqi_filtered.merge(aaq_merged,on=["location","date"],how="left")
aqi_aaq_df.columns = ["location","date","aqi","nox","pm2.5","toluene","benzene","so2","xylene","o3","pm10","nh3","co"]

In [None]:
plt.figure(figsize=(10,30))
sns.heatmap(aqi_aaq_df.set_index(["location","date"]).isna())

In [None]:
grouped_dict = {k:v for k,v in aqi_aaq_df.groupby("location")}
for loc in grouped_dict:
  grouped_dict[loc] =grouped_dict[loc].set_index(["location","date"]).interpolate("linear",limit_area=None)
aqi_aaq_filled = pd.concat(grouped_dict.values())

In [None]:
plt.figure(figsize=(10,30))
sns.heatmap(aqi_aaq_filled.isna())  

In [None]:
aqi_aaq_filled.fillna(0,inplace=True)

## AQI AAQ loc map

In [None]:
aqi_aaq_filled

In [None]:
aqi_locations = pd.read_csv("/content/climate-change-hackathon/Dataset/aqi_locations.csv")
aqi_locations.columns = ["location","lat","long","alt"]

In [None]:
aqi_aaq_geo = aqi_aaq_filled.reset_index().merge(aqi_locations,how='left',on=["location"])

In [None]:
aqi_aaq_geo.to_csv("aqi_aaq_geo.csv")

## Processing aaq-aqi dataset

In [None]:
!pip install cond-rnn

In [None]:
import tensorflow as tf
import tensorflow.keras.layers as tfl
from cond_rnn import ConditionalRecurrent

In [None]:
df = pd.read_csv("aqi_aaq_geo.csv",parse_dates=True,index_col=0)
df["date"] = df["date"].apply(lambda x: datetime.strptime(x,"%Y-%m-%d") )
df["year"] = df["date"].apply(lambda x: x.year)
df["month_sin"] = df["date"].apply(lambda x: np.sin((x.month-1)/12*2*np.pi))
df["month_cos"] = df["date"].apply(lambda x: np.cos((x.month-1)/12*2*np.pi))
df.drop(columns = ["toluene","xylene","benzene"],inplace=True)

In [None]:
aqi_features = ["aqi","so2","nox","pm10","pm2.5","co","o3","nh3"]
meteorological_features = ["rainfall","humid_min","humid_max","temp_min","temp_max","wind_speed"]
temporal_features = ["month_sin","month_cos","year"]
time_series_features = aqi_features+meteorological_features+temporal_features
geo_spatial_features = ["lat","long","alt"]
all_features = time_series_features+geo_spatial_features

out_feature_names = ["aqi_out","temp_max_out","humid_max_out"]

In [None]:
location_dfs = [df[df["location"]== location].drop(columns=["location","date"])  for location in df["location"].unique()]

In [None]:
location_datasets = [tf.data.Dataset.from_tensor_slices(dict(l_df)) for l_df in location_dfs]

In [None]:
def process_dataset(ds):
  x = {k:v[:24] for k,v in ds.items()}
  
  # geospatial features are same through out the window thus take only the first one
  x["lat"] = x["lat"][0:1]
  x["long"] = x["long"][0:1]
  x["alt"] = x["alt"][0:1]
  
  for feature in geo_spatial_features:
    x[feature].set_shape([1])
    
  for feature in aqi_features+temporal_features:
    x[feature].set_shape([24])
    x[feature]= tf.expand_dims(x[feature],axis=-1)

  y = {}
  y["aqi_out"] = ds["aqi"][24:]
  # y["temp_max_out"] = ds["temp_max"][24:]
  # y["humid_max_out"] = ds["humid_max"][24:]

  for feature in ["aqi_out"]:
    y[feature].set_shape([12])
    y[feature]= tf.expand_dims(y[feature],axis=-1)
    
    
  return x,y


location_windowed = [
    l_ds\
    .window(36,shift=1,drop_remainder=True)\
    .flat_map(lambda x: tf.data.Dataset.zip({k:v.batch(36) for (k, v) in x.items()}))\
    .map(process_dataset)

    for l_ds in location_datasets
]
location_all_windowed = tf.data.Dataset.from_tensor_slices(location_windowed).flat_map(lambda x:x).batch(32).cache()

In [None]:
location_all_windowed

## AQI AAQ Analysis

In [None]:
plt.figure(figsize=(30,15))
sns.lineplot(data = df,x="date",y="aqi",hue="location")
plt.show()
plt.figure(figsize=(10,5))
sns.lineplot(data = df.set_index("location").groupby("date").mean(),x="date",y="aqi")

## Mobility

In [None]:
!wget https://www.gstatic.com/covid19/mobility/Region_Mobility_Report_CSVs.zip
!unzip Region_Mobility_Report_CSVs.zip -d region_mobility_report

In [None]:
df = pd.concat([
    pd.read_csv("region_mobility_report/2020_IN_Region_Mobility_Report.csv"),
    pd.read_csv("region_mobility_report/2021_IN_Region_Mobility_Report.csv"),
    pd.read_csv("region_mobility_report/2022_IN_Region_Mobility_Report.csv"),
])
df

In [None]:
telangana_df = df[df["sub_region_1"]=="Telangana"].drop(columns=["country_region_code","country_region","sub_region_1","iso_3166_2_code","census_fips_code","metro_area","place_id"])
telangana_df

In [None]:
locations = ["Adilabad","Nizamabad","Warangal","Karimnagar","Khammam"]

In [None]:
telengana_df = telangana_df[telangana_df["sub_region_2"].isin(locations)]

## Model

In [None]:
class OneToManyRNN(tf.keras.layers.Layer):
  def __init__(self,rnn_cell,n_outputs,**kwargs):
    super().__init__(**kwargs)
    self.cell = rnn_cell
    self.n_outputs = n_outputs

  def get_config(self):
    config = super().get_config()
    config.update({
        "cell":self.cell,
        "n_outputs":self.n_outputs
    })
    return config
  def call(self,input,state):
    prediction = input
    predictions = []
    for i in range(self.n_outputs):
      prediction,state = self.cell(prediction,state)
      predictions.append(prediction)
    
    return tf.transpose(tf.stack(predictions),[1,0,2])

In [None]:
def get_normalization_layer(feature,ds):
  norm = tfl.Normalization()
  norm.adapt(ds.map(lambda x,y:x[feature]))
  return norm

In [None]:
inputs = [tf.keras.Input(shape=(24,1),name=name) for name in aqi_features+temporal_features]
# inputs_norm = [get_normalization_layer(feature,location_all_windowed)(input) for feature,input in zip(aqi_features+temporal_features,inputs)]
inputs_concat = tf.keras.layers.concatenate(inputs,name="time_series_inputs")

inputs_dense = tfl.Dense(16,activation="relu",kernel_regularizer="l1")(inputs_concat)
inputs_dense = tfl.BatchNormalization()(inputs_dense)

cond_inputs = [tf.keras.Input(shape=(1),name=name) for name in geo_spatial_features]
# cond_norm = [get_normalization_layer(feature,location_all_windowed)(input) for feature,input in zip(geo_spatial_features,cond_inputs)]

cond_concat = tf.keras.layers.concatenate(cond_inputs,name="conditional_inputs")
cond_dense = tfl.Dense(4,activation="relu")(cond_concat)
cond_dense = tfl.BatchNormalization()(cond_concat)

encoder_output,*encoder_state = ConditionalRecurrent(tfl.LSTM(16,activation="relu",return_state=True),name="conditional_encoder")([inputs_dense,cond_dense])

x  = OneToManyRNN(tfl.LSTMCell(16,activation="relu"),12,name="decoder")(encoder_output,encoder_state)

aqi_out = tfl.Dense(1,activation = "relu",name="aqi_out")(x)
# temp_high_out = tfl.Dense(1,activation = "relu",name="temp_max_out")(x)
# humid_high_out = tfl.Dense(1,activation = "relu",name="humid_max_out")(x)

model = tf.keras.Model(inputs = inputs+cond_inputs,outputs = [aqi_out])
# model = tf.keras.Model(inputs = inputs+cond_inputs,outputs = [aqi_out,temp_high_out,humid_high_out])


In [None]:
tf.keras.utils.plot_model(model,rankdir="LR",show_shapes=True)

In [None]:
model.summary()

In [None]:
tf.keras.backend.set_epsilon(1)

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=.01),loss="mse",metrics=["mae","mape"],run_eagerly=True)

In [None]:
model.fit(location_all_windowed.map(lambda x,y:(x,y["aqi_out"])),epochs=50,
          callbacks=[
          tf.keras.callbacks.ReduceLROnPlateau(monitor="loss",min_delta=10,min_lr=.0001,patience=2,cooldown=3,factor=.3),
          tf.keras.callbacks.TensorBoard(log_dir="aqi_model"),
          # tf.keras.callbacks.ModelCheckpoint("aqi_model_checkpoint",monitor="loss")
          ])

In [None]:
model.save("aqi_model")