In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Import csv Data

In [3]:
df_raw = pd.read_csv('../Resources/covid_raw/time_series_covid19_confirmed_global.csv')
df_raw_aus = df_raw[df_raw['Country/Region'] == "Australia" ]

In [4]:
dates = df_raw_aus.columns[4:]

In [5]:
months = [date.split('-')[1] for date in dates]
years = [date.split('-')[2] for date in dates]

## Transpose Data

In [6]:
df_aus = df_raw_aus.T

In [7]:
df_aus

Unnamed: 0,8,9,10,11,12,13,14,15
Province/State,Australian Capital Territory,New South Wales,Northern Territory,Queensland,South Australia,Tasmania,Victoria,Western Australia
Country/Region,Australia,Australia,Australia,Australia,Australia,Australia,Australia,Australia
Lat,-35.4735,-33.8688,-12.4634,-27.4698,-34.9285,-42.8821,-37.8136,-31.9505
Long,149.012,151.209,130.846,153.025,138.601,147.327,144.963,115.861
1-22-20,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
4-17-21,123,5376,119,1518,682,234,20494,973
4-18-21,123,5384,119,1518,682,234,20498,975
4-19-21,123,5387,129,1518,688,234,20499,978
4-20-21,123,5395,132,1519,691,234,20502,980


## Locate the State Columns

In [8]:
df = df_aus.iloc[4:]
df = df.rename(columns={8: 'ACT', 9: 'NSW', 10:'NT', 11: 'QLD', 12: 'SA', 13:'TAS', 14: 'VIC', 15: 'WA' })
df

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA
1-22-20,0,0,0,0,0,0,0,0
1-23-20,0,0,0,0,0,0,0,0
1-24-20,0,0,0,0,0,0,0,0
1-25-20,0,0,0,0,0,0,0,0
1-26-20,0,3,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...
4-17-21,123,5376,119,1518,682,234,20494,973
4-18-21,123,5384,119,1518,682,234,20498,975
4-19-21,123,5387,129,1518,688,234,20499,978
4-20-21,123,5395,132,1519,691,234,20502,980


## Re-calculate the cases per day so its not cumulative

In [9]:
# act=[]
# nsw=[]
cases = {}
states = df.columns

for i in range(len(states)):
    count = 0
    cases[f'{states[i]}']=[]
    for j in range(1, len(df)):
        if count == 0:
            cases[f'{states[i]}'].append(0)

        next_val = df.iloc[j,i]
        prev_val = df.iloc[j-1,i]
        diff = next_val - prev_val
        cases[f'{states[i]}'].append(diff)

        if j == len(df):
            count = 0
        else:
            count+=1


df_cases = pd.DataFrame(cases)
df_cases.head()

# df_cases[df_cases['ACT'] == -1].index
# states

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA
0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0
4,0,3,0,0,0,0,1,0


In [10]:
df_cases['Month'] = months
df_cases['Year'] = years
df_cases.head()

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,Month,Year
0,0,0,0,0,0,0,0,0,22,20
1,0,0,0,0,0,0,0,0,23,20
2,0,0,0,0,0,0,0,0,24,20
3,0,0,0,0,0,0,0,0,25,20
4,0,3,0,0,0,0,1,0,26,20


## Create Monthly & Quarterly Cases per year

In [11]:
# Cases Monthly
df_cases_20 = df_cases[df_cases['Year'] == '20']
df_cases_21 = df_cases[df_cases['Year'] == '21']

In [12]:
Year_20 = df_cases_20.groupby('Month').sum()
Year_20
mos_20 = Year_20.index
Year_21 = df_cases_21.groupby('Month').sum()
Year_21
mos_21 = Year_21.index

In [13]:
# Check result
df_cases_20[df_cases_20['Month'] == '12'].tail()

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,Month,Year
203,0,12,0,0,0,0,217,2,12,20
234,0,9,0,0,0,0,35,0,12,20
264,0,11,0,0,0,0,12,2,12,20
295,0,0,0,1,0,0,0,0,12,20
325,0,3,0,0,0,0,1,1,12,20


### Monthly cases 2020

In [14]:
Year_20 = Year_20.reset_index(drop = True)
Year_20['AU'] = Year_20.sum(axis = 1)
Year_20['Month'] = mos_20 + '_2020'
Year_20

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU,Month
0,4,215,2,50,33,0,858,7,1169,1_2020
1,3,109,3,20,8,11,530,26,710,10_2020
2,0,82,1,27,6,13,732,15,876,11_2020
3,0,59,0,13,0,0,439,5,516,12_2020
4,0,90,5,23,10,13,722,16,879,13_2020
5,1,99,3,28,15,21,604,24,795,14_2020
6,0,91,0,19,24,2,619,23,778,15_2020
7,1,90,2,13,13,5,746,21,891,16_2020
8,1,122,0,22,10,11,494,29,689,17_2020
9,1,141,1,27,10,11,640,14,845,18_2020


In [16]:
states

Index(['ACT', 'NSW', 'NT', 'QLD', 'SA', 'TAS', 'VIC', 'WA'], dtype='object')

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU,Month
26,3,151,1,14,1,5,667,35,877,5_2020
27,1,92,0,22,12,5,643,-13,762,6_2020
28,3,102,2,23,4,4,634,10,782,7_2020
29,4,115,0,20,5,13,637,22,816,8_2020
30,2,93,2,17,8,14,703,21,860,9_2020


### Quarterly Cases for Y 2020

In [15]:
Q20={}
counter = 1
calc = 0
for i in range (len(states)):
    Q20[f'{states[i]}']=[]
    # print(states[i])
    for j in range(len(Year_20)):
        calc = Year_20.iloc[j,i] + calc
        # print(Year_20.iloc[j,i],calc, counter, Q20[f'{states[i]}'] )
        
        if counter == 3:
            # Append every 3 months
            Q20[f'{states[i]}'].append(calc)
            calc = 0
            counter = 0
        
        counter+=1
Q20_cases = pd.DataFrame(Q20)
# Adding sum AU
Q20_cases['AU'] = Q20_cases.sum(axis = 1)
Q20_cases

ValueError: arrays must all be same length

In [None]:
quar20 = ['Q1', 'Q2', 'Q3', 'Q4']
Q20_cases['Year'] = '2020'
quart=[]
for i in range(len(Q20_cases)):
    quart.append(f'{quar20[i]}_{Q20_cases.iloc[i,9]}')
Q20_cases['Quartal'] = quart
Q20_cases

### Monthly Cases for Y 2021

In [None]:
Year_21 = Year_21.reset_index(drop = True)
# Year_21 = Year_21[:-1]
# Year_21['AU'] = Year_21.sum(axis = 1)

Year_21['AU'] = Year_21.sum(axis = 1)
Year_21['Month'] = mos_21 + '_2021'
Year_21

### Quarterly Cases for Y 2021

In [None]:
Q21={}
counter = 1
calc = 0
for i in range (len(states)):
    Q21[f'{states[i]}']=[]
    # print(states[i])
    for j in range(len(Year_21)):
        calc = Year_21.iloc[j,i] + calc
        # print(Year_21.iloc[j,i],calc, counter, Q21[f'{states[i-1]}'] )
        
        if counter == 3:
            # Append every 3 months
            Q21[f'{states[i]}'].append(calc)
            calc = 0
            counter = 0
        if j == len(Year_21)-1:
            Q21[f'{states[i]}'].append(calc)
            counter = 0
            calc = 0
        
        counter+=1
Q21
Q21_cases = pd.DataFrame(Q21)
Q21_cases['AU'] = Q21_cases.sum(axis = 1)
Q21_cases


In [None]:
Q21_cases['Year'] = '2021'
quar21 = ['Q1', 'Q2']
quart21=[]
for i in range(len(Q21_cases)):
    quart21.append(f'{quar21[i]}_{Q21_cases.iloc[i,9]}')
Q21_cases['Quartal'] = quart21
Q21_cases

# Create Data for 2018 and 2019
Assuming that there were no cases prior Jan 2020

### Monthly 2018 and 2019

In [None]:
mos_1819 = []
for i in range(2):
    for j in range(len(mos_20)):
         mos_1819.append(mos_20[j])
year1819 = ['2018', '2019']
yr_1819=[]
for i in range(len(year1819)):
    for j in range(len(mos_20)):
         yr_1819.append(year1819[i])

mo_yr = []
for i in range(len(yr_1819)):
    mo_yr.append(f'{mos_1819[i]}_{yr_1819[i]}')
mo_yr


Month_1819 = pd.DataFrame({'ACT':0,
'NSW':0,
'NT':0, 
'QLD':0,
'SA':0,
'TAS':0,
'VIC':0,
'WA':0,
'AU': 0, 
'Month' : mo_yr})
Month_1819

In [None]:
Y_2021 = Year_20.append(Year_21, ignore_index=True)
Y_1821 = Month_1819.append(Y_2021, ignore_index= True)
Y_1821.to_csv('../Resources/M_C19_AU_2018_2021.csv')

### Quarterly 2018 and 2019

In [None]:
# Create data for 2018 and 2019
Year18 = [2018 for i in quar20]
Year19 = [2019 for i in quar20]
# year1819 = ['2018', '2018', '2018', '2018', '2019']
# Q18 = pd.DataFrame({'Year': 2018})
Q18 = pd.DataFrame({'ACT':0,'NSW':0,'NT':0, 'QLD':0,'SA':0,'TAS':0,'VIC':0,'WA':0,'AU': 0, 'Year' : Year18})
Q19 = pd.DataFrame({'ACT':0,'NSW':0,'NT':0, 'QLD':0,'SA':0,'TAS':0,'VIC':0,'WA':0,'AU':0,'Year' : Year19})
# Q19 = pd.)
Q1819 = Q18.append(Q19, ignore_index=True)

quar1819 = ['Q1', 'Q2','Q3','Q4', 'Q1', 'Q2','Q3','Q4']
quart1819=[]
for i in range(len(Q1819)):
    quart1819.append(f'{quar1819[i]}_{Q1819.iloc[i,9]}')
Q1819['Quartal'] = quart1819
Q1819

In [None]:
Q2021 = Q20_cases.append(Q21_cases, ignore_index=True)
Q2021

### Total Quarterly Covid19 Cases in Australia 

In [None]:
Q_C19_AU= Q1819.append(Q2021, ignore_index = True)
Q_C19_AU.to_csv('../Resources/C19_AU_2018_2021.csv')
Q_C19_AU