In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import folium
city_day_data = pd.read_csv('../data/comparison_city_day.csv').sort_values(by=['City', 'Date'])

In [2]:
# Cleaning the data like seasonality
city_df = city_day_data.copy()
city_df = city_df[['City', 'Date', 'AQI']]
city_df['Date'] = pd.to_datetime(city_df['Date'])
city_df = city_df[city_df['AQI'].notna()].reset_index()
city_df

Unnamed: 0,index,City,Date,AQI
0,28,Ahmedabad,2015-01-29,209.0
1,29,Ahmedabad,2015-01-30,328.0
2,30,Ahmedabad,2015-01-31,514.0
3,31,Ahmedabad,2015-02-01,782.0
4,32,Ahmedabad,2015-02-02,914.0
...,...,...,...,...
33825,39042,Visakhapatnam,2021-06-27,93.0
33826,39043,Visakhapatnam,2021-06-28,48.0
33827,39044,Visakhapatnam,2021-06-29,83.0
33828,39045,Visakhapatnam,2021-06-30,95.0


In [3]:
# Finding the latitude and longitude of all cities
# Ref: https://simplemaps.com/data/in-cities
print(city_df['City'].unique())
city_loc = {
    'Ahmedabad': (23.0300, 72.5800),
    'Aizawl': (23.7307, 92.7173),
    'Amaravati': (16.5131, 80.5165),
    'Amritsar': (31.6167, 74.8500),
    'Bengaluru': (12.9699, 77.5980),
    'Bhopal': (23.2500, 77.4167),
    'Brajrajnagar': (21.8285, 83.9176),
    'Chandigarh': (30.7333, 76.7794),
    'Chennai': (13.0827, 80.2707),
    'Coimbatore': (11.0168, 76.9558),
    'Delhi': (28.7041, 77.1025),
    'Ernakulam': (9.9816, 76.2999),
    'Gurugram': (28.4595, 77.0266),
    'Guwahati': (26.1445, 91.7362),
    'Hyderabad': (17.3850, 78.4867),
    'Jaipur': (26.9124, 75.7873),
    'Jorapokhar': (23.7041, 86.4137),
    'Kochi': (9.9312, 76.2673),
    'Kolkata': (22.5726, 88.3639),
    'Lucknow': (26.8467, 80.9462),
    'Mumbai': (19.0760, 72.8777),
    'Patna': (25.5941, 85.1376),
    'Shillong': (25.5788, 91.8933),
    'Talcher': (20.9501, 85.2168),
    'Thiruvananthapuram': (8.5241, 76.9366),
    'Visakhapatnam': (17.6868, 83.2185)
}
city_df['Latitude'] = city_df['City'].apply(lambda x: city_loc[x][0])
city_df['Longitude'] = city_df['City'].apply(lambda x: city_loc[x][1])
city_df.info()

['Ahmedabad' 'Aizawl' 'Amaravati' 'Amritsar' 'Bengaluru' 'Bhopal'
 'Brajrajnagar' 'Chandigarh' 'Chennai' 'Coimbatore' 'Delhi' 'Ernakulam'
 'Gurugram' 'Guwahati' 'Hyderabad' 'Jaipur' 'Jorapokhar' 'Kochi' 'Kolkata'
 'Lucknow' 'Mumbai' 'Patna' 'Shillong' 'Talcher' 'Thiruvananthapuram'
 'Visakhapatnam']
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33830 entries, 0 to 33829
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   index      33830 non-null  int64         
 1   City       33830 non-null  object        
 2   Date       33830 non-null  datetime64[ns]
 3   AQI        33830 non-null  float64       
 4   Latitude   33830 non-null  float64       
 5   Longitude  33830 non-null  float64       
dtypes: datetime64[ns](1), float64(3), int64(1), object(1)
memory usage: 1.5+ MB


In [4]:
# Convert date to string
city_df['Date'] = city_df['Date'].dt.strftime("%d-%m-%Y")
city_df

Unnamed: 0,index,City,Date,AQI,Latitude,Longitude
0,28,Ahmedabad,29-01-2015,209.0,23.0300,72.5800
1,29,Ahmedabad,30-01-2015,328.0,23.0300,72.5800
2,30,Ahmedabad,31-01-2015,514.0,23.0300,72.5800
3,31,Ahmedabad,01-02-2015,782.0,23.0300,72.5800
4,32,Ahmedabad,02-02-2015,914.0,23.0300,72.5800
...,...,...,...,...,...,...
33825,39042,Visakhapatnam,27-06-2021,93.0,17.6868,83.2185
33826,39043,Visakhapatnam,28-06-2021,48.0,17.6868,83.2185
33827,39044,Visakhapatnam,29-06-2021,83.0,17.6868,83.2185
33828,39045,Visakhapatnam,30-06-2021,95.0,17.6868,83.2185


In [5]:
# We can use any two dates to compare the AQIs
# Lets take the last date and first date as follows:
last_date = '31-12-2019'
prev_date = '01-07-2018'

# Note diving into 2020 because of pandemic effect

# Note going
city_df = city_df[city_df['Date'].isin([prev_date, last_date])].reset_index()

In [6]:
# Add a colour name based on the AQI
city_df['marker_colour'] = pd.cut(city_df['AQI'], bins=6, labels=['#006600', '#00e600', '#ffff00', '#ff9933', '#ff0000', '#800000'])
city_df

Unnamed: 0,level_0,index,City,Date,AQI,Latitude,Longitude,marker_colour
0,625,1277,Ahmedabad,01-07-2018,361.0,23.03,72.58,#800000
1,1157,1825,Ahmedabad,31-12-2019,385.0,23.03,72.58,#800000
2,2330,3073,Amaravati,01-07-2018,68.0,16.5131,80.5165,#006600
3,2789,3621,Amaravati,31-12-2019,87.0,16.5131,80.5165,#006600
4,3761,4660,Amritsar,01-07-2018,94.0,31.6167,74.85,#006600
5,4290,5208,Amritsar,31-12-2019,111.0,31.6167,74.85,#00e600
6,6006,7035,Bengaluru,01-07-2018,72.0,12.9699,77.598,#006600
7,6554,7583,Bengaluru,31-12-2019,97.0,12.9699,77.598,#006600
8,7208,8238,Bhopal,31-12-2019,187.0,23.25,77.4167,#ffff00
9,7902,8994,Brajrajnagar,01-07-2018,111.0,21.8285,83.9176,#00e600


In [7]:
# Plot for the prev date
india_map_1 = folium.Map(location = [21, 78],zoom_start = 4.25,max_zoom = 15,min_zoom = 4)
city_df_prev = city_df[city_df['Date'] == prev_date].reset_index(drop=True)

for index, item in city_df_prev.iterrows():
    folium.CircleMarker(location=[item['Latitude'], item['Longitude']],
                  tooltip="<h5 style = 'text-align:center; font-weight:bold'>" + item['City']+ "</h5>",
                  color=item['marker_colour']).add_to(india_map_1)
india_map_1

In [8]:
# Plot for the last date
india_map_2 = folium.Map(location = [21, 78],zoom_start = 4.45,max_zoom = 15,min_zoom = 4)
city_df_last = city_df[city_df['Date'] == last_date].reset_index(drop=True)

for index, item in city_df_last.iterrows():
    folium.CircleMarker(location=[item['Latitude'], item['Longitude']],
                  tooltip="<h5 style = 'text-align:center; font-weight:bold'>" + item['City']+ "</h5>",
                  color=item['marker_colour']).add_to(india_map_2)
india_map_2