In [None]:
## Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Find the detailed article [here](https://www.notion.so/Effect-on-Air-Quality-COVID-19-1aa99168d0e842129978ced0cbb22c83)

In [None]:
## Read data
path = '../input/air-quality-data-in-india/'
city_day_path = path + 'city_day.csv'
city_hour_path = path + 'city_hour.csv'
station_day_path = path + 'station_day.csv'
station_hour_path = path + 'station_hour.csv'
stations_path = path + 'stations.csv'

city_day = pd.read_csv(city_day_path, low_memory=False)
city_hour = pd.read_csv(city_hour_path)
station_day = pd.read_csv(station_day_path)
station_hour = pd.read_csv(station_hour_path)
stations = pd.read_csv(stations_path)

### Lets review the files one by one

In [None]:
## city_day
print("City Day Data", city_day.shape)
print(city_day.columns.tolist())
city_day.head()

In [None]:
## city_hour
print("City Hour Data", city_hour.shape)
print(city_hour.columns.tolist())
city_hour.head()

In [None]:
## station_day
print("Station Day Data", station_day.shape)
print(station_day.columns.tolist())
station_day.head()

In [None]:
## station_hour
print("Station Hour Data", station_hour.shape)
print(station_hour.columns.tolist())
station_hour.head()

In [None]:
## stations
print("Stations Data", stations.shape)
print(stations.columns.tolist())
stations.head()

In [None]:
## Convert into datetime
city_day['Date'] = pd.to_datetime(city_day['Date'])
city_day['Year'] = city_day['Date'].dt.year
city_day['Month'] = city_day['Date'].dt.month
city_day['day'] = city_day['Date'].dt.day
city_day.head()

In [None]:
city_day.describe()

## Cities

### How many cities do we have here?

In [None]:
print("Total cities", len(city_day['City'].unique()))
city_day['City'].unique().tolist()

Covers all of it doesn't it

### Lets have a look at the pollutants and how they are distributed

Observations:

- PM2.5, O3 are usually high throughout the year
- CO has been found consistently low over the years

In [None]:
pollutants = ['PM2.5','PM10', 'NO', 'NOx','NO2','CO','NH3','O3','SO2']
df = city_day.set_index('Date')
axes = df[pollutants].plot(marker='.',alpha = 0.5, linestyle = 'None', figsize = (16,20), subplots = True)
for ax in axes:
    ax.set_xlabel('Years')
    ax.set_ylabel('ug/m3')

### How are the AQI Buckets distributed?

In [None]:
plt.title('Distribution of AQI Buckets 2015-2020')
sns.countplot(x='AQI_Bucket', data=city_day, hue='Year');

- Overall we see we have a large number of Moderate and Satisfactory bucket counts which is a good thing and low number of Poor and Severe buckets over 2015-2020.
- If we observe carefully, 2019 has highest Moderate and Satisfactory buckets which can be explained by lower pollution levels by the lockdown.
- And as compared to the previous years 2018 & 2019, for all the buckets we see a steep decline in 2020, and in case of "Good" bucket - higher number than previous years which can be seen as a positive sign.

### Lets study the AQI levels themselves

In [None]:
## AQI
print(f"Minimum and maximum range of AQI: {city_day.AQI.min()} - {city_day.AQI.max()}")
plt.title('AQI levels over 2015 to 2020')
sns.lineplot(x='Year',y='AQI', data=city_day);

We observe a general decline in the AQI levels over the years which shows increase in Air Quality

### Lets observe the AQI levels of some of the metro cities over the years.

In [None]:
data = city_day[(city_day['City'] == 'Mumbai') | (city_day['City'] == 'Delhi') | (city_day['City'] == 'Bengaluru') | (city_day['City'] == 'Ahmedabad') | (city_day['City'] == 'Chennai')]
plt.title('AQI Levels of some metro cities 2015-2020')
sns.lineplot(x='Year',y='AQI', data=data, hue='City');

### In 2020 how did the AQI levels vary?

In [None]:
data = city_day[city_day['Year'] == 2020]
plt.title('AQI Levels in the year 2020')
sns.lineplot(x='Month', y='AQI', data=data);

Even before the nationwide lockdown declared by the PM (24th March 2020), individual states were already in lockdown before that and hence we see the steep decline in the AQI from Feb.

### Lets compare for Delhi & Mumbai in 2020 both hotspots for pollutants.

In [None]:
data = city_day[city_day['Year'] == 2020][(city_day['City'] == 'Mumbai') | (city_day['City'] == 'Delhi')]
plt.title('AQI Levels of Delhi & Mumbai in 2020')
sns.lineplot(x='Month',y='AQI', data=data, hue='City');

### ***Analyzing the stock market during the pandemic***

### Lets study Cipla's stocks

In [None]:
## Read data
cipla = pd.read_csv('../input/nifty50-stock-market-data/CIPLA.csv')
cipla['Date'] = pd.to_datetime(cipla['Date'])
cipla_2020 = cipla[cipla['Date']>='2020-03-28']
cipla_2020.head()

In [None]:
cipla_2020.describe()

In [None]:
cipla_2020.columns.tolist()

### Lets compare the present price with the price during March ie pandemic started

In [None]:
cipla_2020['value_change'] = cipla_2020.apply(lambda row: row.Close-cipla_2020.Close.iloc[0], axis=1)
cipla_2020.plot(x='Date', y='value_change', title='Stock price change of Cipla from March 2020 to May 2021');

As expected the price is increasing

### Lets also plot the percentage change

In [None]:
cipla_2020['percent_change'] = cipla_2020.apply(lambda row: ((row.Close/cipla_2020.Close.iloc[0]) - 1) * 100, axis=1)
cipla_2020.plot(x='Date', y='percent_change', title='Percentage change of Cipla from Mar 2020 to May 2021');