In [2]:
import numpy as np
import pandas as pd
from datetime import datetime

# Overview

In this notebook, we will explore two datasets:
1) Covid-19 related data
2) Air Quality Index data

The idea is to compare data from both datasets to understand whether covid-19 had any impact on the Air Quality. According to IQAir (https://www.iqair.com/malta), in 2021, Malta's most polluted city was L-Imsida and therefore we will be analysizing whether the Air Quality had improved, worsened, or remained as is during the covid period 2020 - 2022.

In [3]:
csvCovid_path = './Data/COVID19.csv'
csvAQI_path = './Data/msida-AQI.csv'

covid_df = pd.read_csv (csvCovid_path)
aqi_df = pd.read_csv (csvAQI_path)

Covidfile = open('./Data/COVID19.csv', 'r')
AQIfile = open( './Data/msida-AQI.csv', 'r')


# Data Preperation

The first step is to understand the data. Both of the datasets will be imported into dataframes to visualise the data better. Since covid started in 2020, we will be removing older data (<2020) from the AQI Dataset.


In [4]:
covid_df.dtypes

Date            object
New Cases        int64
Total Cases      int64
Recovered        int64
Deaths           int64
Active Cases     int64
dtype: object

In [5]:

aqi_df.dtypes

2022/12/1    object
 64          object
 34          object
 17          object
 30          object
             object
 .1          object
dtype: object

In [6]:
def chr_int(a):
    if a.isdigit():
        return int(a)
    else:
        return 0
                
data=[]
for line in AQIfile:
    data1=line.split(', ')
    if len(data1)==7:
        data.append([pd.to_datetime(data1[0],format="%Y/%m/%d"),chr_int(data1[1]),chr_int(data1[2]),chr_int(data1[3]),chr_int(data1[4]),chr_int(data1[5]),chr_int(data1[6])])

In [7]:
aqi_df = pd.DataFrame(data) 

In [8]:
aqi_df.dtypes

0    datetime64[ns]
1             int64
2             int64
3             int64
4             int64
5             int64
6             int64
dtype: object

In [9]:
aqi_df.columns = ["date", "pm25", "pm10", "o3", 
                "no2", "so2", "co"]


#aqi_df = aqi_df.iloc[1: , :]




In [10]:
aqi_df = aqi_df[aqi_df.date >= '2020-03-06']
aqi_df = aqi_df.sort_values('date')




In [11]:
aqi_df.head()

Unnamed: 0,date,pm25,pm10,o3,no2,so2,co
1011,2020-03-06,29,16,0,12,0,0
1012,2020-03-07,11,19,0,11,0,0
1013,2020-03-08,22,24,0,15,0,0
1014,2020-03-09,24,18,0,13,0,0
1015,2020-03-10,22,26,0,16,0,0


In [12]:
aqi_df.shape

(1027, 7)

In [13]:
covid_df

Unnamed: 0,Date,New Cases,Total Cases,Recovered,Deaths,Active Cases
0,06/03/2020,1,1,0,0,1
1,07/03/2020,2,3,0,0,3
2,08/03/2020,0,3,0,0,3
3,09/03/2020,1,4,0,0,4
4,10/03/2020,1,5,0,0,5
...,...,...,...,...,...,...
1025,26/12/2022,6,116253,114773,813,271
1026,27/12/2022,27,116280,114787,813,284
1027,28/12/2022,23,116303,114814,813,280
1028,29/12/2022,17,116320,114844,813,267


In [14]:
covid_df.shape

(1030, 6)