In [1]:
import os
from glob import glob

import pandas as pd

In [2]:
files = glob("../data/cleaned/*_aq.csv")

files

['../data/cleaned/aligarh_aq.csv',
 '../data/cleaned/bengaluru_aq.csv',
 '../data/cleaned/gurgaon_aq.csv',
 '../data/cleaned/mohali_aq.csv']

## COVID-19 lockdown in India

According to the [Wikipedia](https://en.wikipedia.org/wiki/COVID-19_lockdown_in_India) the lockdown was came into effect from **25 March 2020**.

Will filter the collected data of hourly air quality from [Berkeley Earth](https://berkeleyearth.org/) from **01 Jan 2020** to **30 Sep 2021** and try to analyze the impact of lockdown on the air quality.

In [3]:
# Creating directory for dumping the filtered data
os.makedirs("../data/filtered/", exist_ok=True)

In [4]:
start_date = "2020-01-01 00:00:00"
end_date = "2021-09-30 23:59:59"

for file in files:
    city = file.split("/")[-1].strip(".csv").split("_")[0]
    aq_df = pd.read_csv(file, parse_dates=["timestamp_utc"])
    
    # Filtering the data
    aq_df = aq_df[
        (start_date <= aq_df.timestamp_utc) & 
        (aq_df.timestamp_utc <= end_date)
    ]
    
    # Dumping the data to CSV
    aq_df.to_csv(f"../data/filtered/{city}.csv", index=False)
    
    print(f"Data filtered and dumped for {city.capitalize()}")
    print("-" * 40)

Data filtered and dumped for Aligarh
----------------------------------------
Data filtered and dumped for Bengaluru
----------------------------------------
Data filtered and dumped for Gurgaon
----------------------------------------
Data filtered and dumped for Mohali
----------------------------------------
