<a href="https://colab.research.google.com/github/livinNector/climate-change-hackathon/blob/main/climate_change_hackathon_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://oauth2: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@github.com/Ananthzeke/climate-change-hackathon.git

In [None]:
!kaggle datasets download -d rohanrao/air-quality-data-in-india

In [None]:
!pip install cond-rnn

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.layers as tfl

In [None]:
from cond_rnn import ConditionalRecurrent

In [None]:
from datetime import datetime

In [None]:
!wget https://www.gstatic.com/covid19/mobility/Region_Mobility_Report_CSVs.zip
!unzip Region_Mobility_Report_CSVs.zip -d region_mobility_report

In [None]:
df = pd.read_csv("region_mobility_report/2020_IN_Region_Mobility_Report.csv")
df = pd.read_csv("region_m")
df

In [None]:
telangana_df = df[df["sub_region_1"]=="Telangana"].drop(columns=["country_region_code","country_region","sub_region_1","iso_3166_2_code","census_fips_code","metro_area","place_id"])
telangana_df

In [None]:
locations = ["Adilabad","Nizamabad","Warangal","Karimnagar","Khammam"]

In [None]:
telangana_df[telangana_df["sub_region_2"].isin(locations)]

## Processing aaq-aqi dataset

In [None]:
df = pd.read_csv("/content/climate-change-hackathon/Dataset/updated_aqi_and_aaq.csv",parse_dates=True)
df["Date"] = df["Date"].apply(lambda x: datetime.strptime(x,"%Y-%m-%d") )
df["year"] = df["Date"].apply(lambda x: x.year-2016)
df["month_sin"] = df["Date"].apply(lambda x: np.sin((x.month-1)/12*2*np.pi))
df["month_cos"] = df["Date"].apply(lambda x: np.cos((x.month-1)/12*2*np.pi))
df.drop(columns = ["Toluene","Xylene","Benzene","Date"],inplace=True)
df.fillna(0,inplace=True)
df.columns = ["location","lat","long","co","pm2.5","nh3","pm10","nox","o3","so2","aqi","alt","year","month_sin","month_cos"]

In [None]:
aqi_features = ["aqi","so2","nox","pm10","pm2.5","co","o3","nh3"]
meteorological_features = ["rainfall","humid_min","humid_max","temp_min","temp_max","wind_speed"]
temporal_features = ["month_sin","month_cos","year"]
time_series_features = aqi_features+meteorological_features+temporal_features
geo_spatial_features = ["lat","long","alt"]
all_features = time_series_features+geo_spatial_features

out_feature_names = ["aqi_out","temp_max_out","humid_max_out"]

In [None]:
for feature in all_features:
  if feature not in df.columns:
    df[feature] = 0

df

In [None]:
aqi_locations = df[["location","lat","long"]]
# aqi_locations["location"] = aqi_locations["location"].apply(str.strip)
aqi_locations = aqi_locations.groupby(["lat","long"]).agg(lambda x: "-".join(x))
len(aqi_locations)

In [None]:
from geopy.geocoders import GeoNames

geolocator = GeoNames(username="livinnector2001",user_agent="hai")
locations_geocode = {loc:geolocator.geocode(loc) for loc in locations}



In [None]:
location_dfs = [df[df["location"]== location].drop(columns=["location"])  for location in df["location"].unique()]

In [None]:
location_datasets = [tf.data.Dataset.from_tensor_slices(dict(l_df)) for l_df in location_dfs]

In [None]:
def process_dataset(ds):
  x = {k:v[:24] for k,v in ds.items()}
  
  # geospatial features are same through out the window thus take only the first one
  x["lat"] = x["lat"][0:1]
  x["long"] = x["long"][0:1]
  x["alt"] = x["alt"][0:1]
  
  for feature in geo_spatial_features:
    x[feature].set_shape([1])
    
  for feature in time_series_features:
    x[feature].set_shape([24])
    x[feature]= tf.expand_dims(x[feature],axis=-1)

  y = {}
  y["aqi_out"] = ds["aqi"][24:]
  y["temp_max_out"] = ds["temp_max"][24:]
  y["humid_max_out"] = ds["humid_max"][24:]

  for feature in out_feature_names:
    y[feature].set_shape([12])
    y[feature]= tf.expand_dims(y[feature],axis=-1)
    
    
  return x,y


location_windowed = [
    l_ds\
    .window(36,shift=1,drop_remainder=True)\
    .flat_map(lambda x: tf.data.Dataset.zip({k:v.batch(36) for (k, v) in x.items()}))\
    .map(process_dataset)

    for l_ds in location_datasets
]
location_all_windowed = tf.data.Dataset.from_tensor_slices(location_windowed).flat_map(lambda x:x).batch(32).cache()

In [None]:
location_all_windowed

## Model

In [None]:
class OneToManyRNN(tf.keras.layers.Layer):
  def __init__(self,rnn_cell,n_outputs,**kwargs):
    super().__init__(**kwargs)
    self.cell = rnn_cell
    self.n_outputs = n_outputs

  def call(self,input,state):
    prediction = input
    predictions = []
    for i in range(self.n_outputs):
      prediction,state = self.cell(prediction,state)
      predictions.append(prediction)
    
    return tf.transpose(tf.stack(predictions),[1,0,2])

In [None]:
def get_normalization_layer(feature,ds):
  norm = tfl.Normalization()
  norm.adapt(ds.map(lambda x,y:x[feature]))
  return norm

In [None]:
inputs = [tf.keras.Input(shape=(24,1),name=name) for name in aqi_features+temporal_features]
input_norms = [get_normalization_layer(feature,location_all_windowed)(input) for feature,input in zip(aqi_features+temporal_features,inputs)]
inputs_concat = tf.keras.layers.concatenate(input_norms,name="time_series_inputs")

input_dense = tfl.Dense(8,activation="relu",kernel_regularizer="l1")(inputs_concat)
input_norm = tfl.BatchNormalization()(input_dense)

cond_inputs = [tf.keras.Input(shape=(1),name=name) for name in geo_spatial_features]
cond_norms = [get_normalization_layer(feature,location_all_windowed)(input) for feature,input in zip(geo_spatial_features,cond_inputs)]
cond_concat = tf.keras.layers.concatenate(cond_norms,name="conditional_inputs")
cond_concat = tfl.Dense(2,activation="relu",kernel_regularizer="l1")(cond_concat)
cond_norm = tfl.BatchNormalization()(cond_concat)

encoder_output,*encoder_state = ConditionalRecurrent(tfl.LSTM(8,activation="relu",return_state=True),name="conditional_encoder")([input_norm,cond_norm])

x  = OneToManyRNN(tfl.LSTMCell(8,activation="relu"),12,name="decoder")(encoder_output,encoder_state)
# x = tfl.LSTM(8,activation="relu",return_sequences=True)(x)

aqi_out = tfl.Dense(1,activation = "relu",name="aqi_out")(x)
# temp_high_out = tfl.Dense(1,activation = "relu",name="temp_max_out")(x)
# humid_high_out = tfl.Dense(1,activation = "relu",name="humid_max_out")(x)

model = tf.keras.Model(inputs = inputs+cond_inputs,outputs = [aqi_out])
# model = tf.keras.Model(inputs = inputs+cond_inputs,outputs = [aqi_out,temp_high_out,humid_high_out])


In [None]:
tf.keras.utils.plot_model(model,rankdir="LR",show_shapes=True)

In [None]:
model.summary()

In [None]:
tf.keras.backend.set_epsilon(1)

In [None]:
model.compile(optimizer="adam",loss="mse",metrics=["mae","mape"],run_eagerly=True)

In [None]:
model.fit(location_all_windowed.map(lambda x,y:(x,y["aqi_out"])),epochs=20)

```
data{
  lat : val,
  long : val,
  features: {
    f1:[vals],
    f2:[vals],
    f3:[vals],
  }
}
```

In [None]:
def get_input_feature(name):
  
