In [30]:
#Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from scipy.stats import linregress
from pprint import pprint

In [47]:
#Create dataframe for Air Quality data from csv files.
aq2020_df = pd.read_csv("Resources/aq2020.csv")
aq2019Q1_df = pd.read_csv("Resources/aq2019Q1.csv")
aq2019Q2_df = pd.read_csv("Resources/aq2019Q2.csv")
aq2019Q3_df = pd.read_csv("Resources/aq2019Q3.csv")

# create a df to hold all of 2019 data
aq2019_df=pd.concat([aq2019Q1_df,aq2019Q2_df,aq2019Q3_df])

#Format dataframe dates
aq2020_df['Date']= pd.to_datetime(aq2020_df['Date'])

aq2019_df['Date']= pd.to_datetime(aq2019_df['Date'])

aq2019_df

Unnamed: 0,Date,Country,City,Specie,count,min,max,median,variance
0,2019-01-16,AE,Abu Dhabi,pm10,24.0,86.0,99.0,97.0,179.40
1,2019-01-22,AE,Abu Dhabi,pm10,24.0,51.0,57.0,55.0,23.75
2,2019-01-26,AE,Abu Dhabi,pm10,24.0,136.0,173.0,160.0,941.96
3,2019-01-07,AE,Abu Dhabi,pm10,24.0,60.0,91.0,72.0,1006.88
4,2019-01-10,AE,Abu Dhabi,pm10,24.0,82.0,93.0,87.0,57.97
...,...,...,...,...,...,...,...,...,...
582605,2019-07-30,HU,Budapest,wind-gust,305.0,0.3,18.0,5.0,165.48
582606,NaT,,,,,,,,
582607,NaT,,,,,,,,
582608,NaT,,,,,,,,


In [52]:
# Clean the data


#drop null values and reset index 2020
clean_aq2020_df = aq2020_df.dropna(how='any')
clean_aq2020_df.head(10)
clean_aq2020_df.reset_index(inplace=True)
del clean_aq2020_df['index']
clean_aq2020_df


#drop null values and reset index 2020
clean_aq2019_df = aq2019_df.dropna(how='any')
clean_aq2019_df.reset_index(inplace=True)
del clean_aq2019_df['index']
clean_aq2019_df


clean_aq2019_df['Country'].unique()

array(['AE', 'CZ', 'SG', 'AT', 'BE', 'KR', 'MN', 'NO', 'NP', 'PE', 'BA',
       'BR', 'IL', 'IN', 'IR', 'NZ', 'UA', 'US', 'FR', 'KW', 'RE', 'TR',
       'XK', 'UZ', 'FI', 'JP', 'KZ', 'RO', 'RU', 'SK', 'ES', 'HK', 'HR',
       'MY', 'TH', 'LK', 'AR', 'CA', 'CH', 'CL', 'ET', 'KG', 'CW', 'DE',
       'MX', 'SA', 'UG', 'BD', 'HU', 'IS', 'PH', 'ID', 'MO', 'SE', 'SV',
       'VN', 'AU', 'BH', 'GB', 'IE', 'LA', 'LT', 'PR', 'PT', 'TW', 'CN',
       'PL', 'BG', 'CY', 'IQ', 'MK', 'BO', 'DK', 'EC', 'ZA', 'CO', 'EE',
       'IT', 'JO', 'NL', 'RS', 'PK', 'TJ', 'MM', 'DZ', 'GT', 'TM'],
      dtype=object)

In [93]:
#Covid API URL
base_url = "https://api.covid19api.com/dayone/country/"
end_url = "/status/confirmed"

Country_list=['Germany','Italy','New Zealand','Spain','Brazil']
Covid_date=[]
Covid_country=[]
Covid_country_code=[]
Covid_cases=[]
n=0

# Loop through the list of cities and perform a request for data on each
print("Beginning Data Retrieval")
print("--------------------------")
for country in Country_list:
    response = requests.get(base_url+country+end_url).json()
    
    for i in range(len(response)):
        #exception handling  
        try:  
            #store required data into respective variables
            Covid_date.append(response[i]['Date'])
            Covid_country.append(response[i]['Country'])
            Covid_country_code.append(response[i]['CountryCode'])
            Covid_cases.append(response[i]['Cases'])
        except KeyError:
            print("Coutry not found. Skipping...")
print("-----------------------------")
print("Data Retrieval Complete")      
print("-----------------------------")

#print(Covid_country)

Beginning Data Retrieval
--------------------------
-----------------------------
Data Retrieval Complete
-----------------------------


In [108]:
# create a data frame 

covid_dict = {
    "Date": Covid_date,
    "Covid_country": Covid_country,
    "Covid_country_code": Covid_country_code,
    "Covid_cases": Covid_cases
}
Covid_data = pd.DataFrame(covid_dict)


#Format covid dataframe dates (now mergable with air quality dataframes)
Covid_data['Date'] = Covid_data['Date'].str.strip('T00:00:00Z')
Covid_data['Date'] = pd.to_datetime(Covid_data['Date'])


sorted_covid_data=Covid_data.sort_values(by='Date', ascending=False)
sorted_covid_data
group_covid_data= sorted_covid_data.groupby(['Date','Covid_country']).sum()
group_covid_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Covid_cases
Date,Covid_country,Unnamed: 2_level_1
2020-01-03,Germany,4
2020-01-27,Germany,1
2020-01-28,Germany,4
2020-01-29,Germany,4
2020-01-31,Germany,5
...,...,...
2020-07-18,Brazil,2074860
2020-07-18,Germany,202426
2020-07-18,Italy,244216
2020-07-18,New Zealand,1553


In [109]:
Covid_data

Unnamed: 0,Date,Covid_country,Covid_country_code,Covid_cases
0,2020-01-27,Germany,DE,1
1,2020-01-28,Germany,DE,4
2,2020-01-29,Germany,DE,4
3,2020-01-03,Germany,DE,4
4,2020-01-31,Germany,DE,5
...,...,...,...,...
794,2020-07-14,Brazil,BR,1926824
795,2020-07-15,Brazil,BR,1966748
796,2020-07-16,Brazil,BR,2012151
797,2020-07-17,Brazil,BR,2046328


In [100]:
#select the coutries and store in variables
Country_code=['DE','IT','NZ','ES','BR']


#for ccode in Country_code:
query_aq2020_df=clean_aq2020_df.loc[(clean_aq2020_df['Country'] =='DE')|
                                    (clean_aq2020_df['Country'] =='IT')|
                                    (clean_aq2020_df['Country'] =='NZ')|
                                    (clean_aq2020_df['Country'] =='ES')|
                                    (clean_aq2020_df['Country'] =='BR'), :]
query_aq2020_df
    
sorted_aq2020_df=query_aq2020_df.sort_values(by='Date', ascending=False)
sorted_aq2020_df

Unnamed: 0,Date,Country,City,Specie,count,min,max,median,variance
193493,2020-07-17,NZ,Hamilton,wind-speed,16.0,9.6,13.2,10.8,11.15
188487,2020-07-17,NZ,Christchurch,temperature,23.0,1.4,4.9,3.7,11.44
186891,2020-07-17,NZ,Auckland,wind-gust,42.0,0.5,9.1,1.2,40.42
190577,2020-07-17,NZ,Wellington,pm10,16.0,1.0,10.0,4.0,54.29
191666,2020-07-17,NZ,Wellington,temperature,24.0,8.2,10.0,8.3,4.46
...,...,...,...,...,...,...,...,...,...
149943,2019-12-30,IT,Bologna,o3,48.0,7.0,7.8,7.0,1.63
57488,2019-12-30,ES,Huelva,pm10,231.0,6.0,113.0,23.0,2958.15
232460,2019-12-30,DE,Kassel,so2,3.0,0.7,3.1,2.6,16.03
239073,2019-12-30,DE,Munich,pm10,96.0,7.0,46.0,16.0,368.63


In [105]:
#Combine covid data and AQ data by date
combined_data_2020=pd.merge(sorted_covid_data,sorted_aq2020_df,how='inner', on ='Date' )
combined_data_2020
# group by date
group_data_2020= combined_data_2020.groupby(['Date']).sum()
group_data_2020

Unnamed: 0_level_0,Covid_cases,count,min,max,median,variance
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-03,2780,68872.0,66754.4,79134.4,72313.3,351780.56
2020-01-27,621,75047.0,67580.7,80401.4,72620.0,469999.33
2020-01-28,2480,75778.0,66968.4,79141.4,71707.4,393239.22
2020-01-29,2488,76149.0,66454.4,81029.5,71243.9,505067.44
2020-01-31,4347,152782.0,134899.4,161696.6,143776.0,2049811.60
...,...,...,...,...,...,...
2020-07-13,1556696750,353240.0,330055.5,402529.0,357120.5,4853426.30
2020-07-14,1585160370,347640.0,330140.0,410095.0,358292.0,3796525.95
2020-07-15,1564728996,334880.0,332285.5,405832.5,356212.5,4331725.50
2020-07-16,1478451104,264555.0,327027.0,377725.0,347070.5,1450133.50
