In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Import csv Data

In [2]:
df_raw = pd.read_csv('../Resources/industry/global_c19_confirmed_data.csv')
df_raw_aus = df_raw[df_raw['Country/Region'] == "Australia" ]

In [3]:
dates = df_raw_aus.columns[4:]

In [4]:
months = [date.split('-')[1] for date in dates]
years = [date.split('-')[2] for date in dates]

## Transpose Data

In [5]:
df_aus = df_raw_aus.T

In [6]:
df_aus

Unnamed: 0,8,9,10,11,12,13,14,15
Province/State,Australian Capital Territory,New South Wales,Northern Territory,Queensland,South Australia,Tasmania,Victoria,Western Australia
Country/Region,Australia,Australia,Australia,Australia,Australia,Australia,Australia,Australia
Lat,-35.4735,-33.8688,-12.4634,-27.4698,-34.9285,-42.8821,-37.8136,-31.9505
Long,149.012,151.209,130.846,153.025,138.601,147.327,144.963,115.861
22-01-20,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
17-04-21,123,5376,119,1518,682,234,20494,973
18-04-21,123,5384,119,1518,682,234,20498,975
19-04-21,123,5387,129,1518,688,234,20499,978
20-04-21,123,5395,132,1519,691,234,20502,980


## Locate the State Columns

In [7]:
df = df_aus.iloc[4:]
df = df.rename(columns={8: 'ACT', 9: 'NSW', 10:'NT', 11: 'QLD', 12: 'SA', 13:'TAS', 14: 'VIC', 15: 'WA' })
# df['Months'] = months

## Re-calculate the cases per day so its not cumulative

In [8]:
# act=[]
# nsw=[]
cases = {}
states = df.columns

for i in range(len(states)):
    count = 0
    cases[f'{states[i]}']=[]
    for j in range(1, len(df)):
        if count == 0:
            cases[f'{states[i]}'].append(0)

        next_val = df.iloc[j,i]
        prev_val = df.iloc[j-1,i]
        diff = next_val - prev_val
        cases[f'{states[i]}'].append(diff)

        if j == len(df):
            count = 0
        else:
            count+=1


df_cases = pd.DataFrame(cases)
df_cases.head()

# df_cases[df_cases['ACT'] == -1].index
# states

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA
0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0
4,0,3,0,0,0,0,1,0


In [9]:
df_cases['Month'] = months
df_cases['Year'] = years
df_cases.head()

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,Month,Year
0,0,0,0,0,0,0,0,0,1,20
1,0,0,0,0,0,0,0,0,1,20
2,0,0,0,0,0,0,0,0,1,20
3,0,0,0,0,0,0,0,0,1,20
4,0,3,0,0,0,0,1,0,1,20


## Create Monthly & Quarterly Cases per year

In [10]:
# Cases Monthly
df_cases_20 = df_cases[df_cases['Year'] == '20']
df_cases_21 = df_cases[df_cases['Year'] == '21']

In [11]:
Year_20 = df_cases_20.groupby('Month').sum()
Year_20
mos_20 = Year_20.index
Year_21 = df_cases_21.groupby('Month').sum()
Year_21
mos_21 = Year_21.index

In [12]:
# Check result
df_cases_20[df_cases_20['Month'] == '12'].tail()

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,Month,Year
340,0,14,0,5,3,0,2,5,12,20
341,0,9,1,2,0,0,0,0,12,20
342,0,25,0,2,3,0,1,1,12,20
343,0,17,0,3,1,0,3,0,12,20
344,0,5,1,0,4,0,8,2,12,20


### Monthly cases 2020

In [13]:
Year_20 = Year_20.reset_index(drop = True)
Year_20['AU'] = Year_20.sum(axis = 1)
Year_20['Month'] = mos_20 + '_2020'
Year_20

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU,Month
0,0,4,0,2,0,0,3,0,9,01_2020
1,0,0,0,7,3,0,4,2,16,02_2020
2,80,2028,17,734,334,69,910,362,4534,03_2020
3,26,993,11,290,101,152,447,187,2207,04_2020
4,1,73,1,25,2,7,289,38,436,05_2020
5,1,105,0,9,3,0,578,22,718,06_2020
6,5,570,4,17,8,1,8700,55,9360,07_2020
7,0,290,0,40,12,1,8207,-11,8539,08_2020
8,0,164,0,33,5,0,1045,30,1277,09_2020
9,1,198,5,15,33,0,163,84,499,10_2020


### Quarterly Cases for Y 2020

In [14]:
Q20={}
counter = 1
calc = 0
for i in range (len(states)):
    Q20[f'{states[i]}']=[]
    # print(states[i])
    for j in range(len(Year_20)):
        calc = Year_20.iloc[j,i] + calc
        # print(Year_20.iloc[j,i],calc, counter, Q20[f'{states[i]}'] )
        
        if counter == 3:
            # Append every 3 months
            Q20[f'{states[i]}'].append(calc)
            calc = 0
            counter = 0
        
        counter+=1
Q20_cases = pd.DataFrame(Q20)
# Adding sum AU
Q20_cases['AU'] = Q20_cases.sum(axis = 1)
Q20_cases

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU
0,80,2032,17,743,337,69,917,364,4559
1,28,1171,12,324,106,159,1314,247,3361
2,5,1024,4,90,25,2,17952,74,19176
3,5,701,42,96,112,4,193,176,1329


In [15]:
quar20 = ['Q1', 'Q2', 'Q3', 'Q4']
Q20_cases['Year'] = '2020'
quart=[]
for i in range(len(Q20_cases)):
    quart.append(f'{quar20[i]}_{Q20_cases.iloc[i,9]}')
Q20_cases['Quartal'] = quart
Q20_cases

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU,Year,Quartal
0,80,2032,17,743,337,69,917,364,4559,2020,Q1_2020
1,28,1171,12,324,106,159,1314,247,3361,2020,Q2_2020
2,5,1024,4,90,25,2,17952,74,19176,2020,Q3_2020
3,5,701,42,96,112,4,193,176,1329,2020,Q4_2020


### Monthly Cases for Y 2021

In [16]:
Year_21 = Year_21.reset_index(drop = True)
# Year_21 = Year_21[:-1]
# Year_21['AU'] = Year_21.sum(axis = 1)

Year_21['AU'] = Year_21.sum(axis = 1)
Year_21['Month'] = mos_21 + '_2021'
Year_21

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU,Month
0,0,182,23,57,16,0,73,42,393,01_2021
1,0,70,7,21,20,0,32,10,160,02_2021
2,5,116,3,146,40,0,3,31,344,03_2021
3,0,106,28,43,37,0,20,38,272,04_2021


### Quarterly Cases for Y 2021

In [17]:
Q21={}
counter = 1
calc = 0
for i in range (len(states)):
    Q21[f'{states[i]}']=[]
    # print(states[i])
    for j in range(len(Year_21)):
        calc = Year_21.iloc[j,i] + calc
        # print(Year_21.iloc[j,i],calc, counter, Q21[f'{states[i-1]}'] )
        
        if counter == 3:
            # Append every 3 months
            Q21[f'{states[i]}'].append(calc)
            calc = 0
            counter = 0
        if j == len(Year_21)-1:
            Q21[f'{states[i]}'].append(calc)
            counter = 0
            calc = 0
        
        counter+=1
Q21
Q21_cases = pd.DataFrame(Q21)
Q21_cases['AU'] = Q21_cases.sum(axis = 1)
Q21_cases


Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU
0,5,368,33,224,76,0,108,83,897
1,0,106,28,43,37,0,20,38,272


In [18]:
Q21_cases['Year'] = '2021'
quar21 = ['Q1', 'Q2']
quart21=[]
for i in range(len(Q21_cases)):
    quart21.append(f'{quar21[i]}_{Q21_cases.iloc[i,9]}')
Q21_cases['Quartal'] = quart21
Q21_cases

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU,Year,Quartal
0,5,368,33,224,76,0,108,83,897,2021,Q1_2021
1,0,106,28,43,37,0,20,38,272,2021,Q2_2021


# Create Data for 2018 and 2019
Assuming that there were no cases prior Jan 2020

### Monthly 2018 and 2019

In [19]:
mos_1819 = []
for i in range(2):
    for j in range(len(mos_20)):
         mos_1819.append(mos_20[j])
year1819 = ['2018', '2019']
yr_1819=[]
for i in range(len(year1819)):
    for j in range(len(mos_20)):
         yr_1819.append(year1819[i])

mo_yr = []
for i in range(len(yr_1819)):
    mo_yr.append(f'{mos_1819[i]}_{yr_1819[i]}')
mo_yr


Month_1819 = pd.DataFrame({'ACT':0,
'NSW':0,
'NT':0, 
'QLD':0,
'SA':0,
'TAS':0,
'VIC':0,
'WA':0,
'AU': 0, 
'Month' : mo_yr})
Month_1819

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU,Month
0,0,0,0,0,0,0,0,0,0,01_2018
1,0,0,0,0,0,0,0,0,0,02_2018
2,0,0,0,0,0,0,0,0,0,03_2018
3,0,0,0,0,0,0,0,0,0,04_2018
4,0,0,0,0,0,0,0,0,0,05_2018
5,0,0,0,0,0,0,0,0,0,06_2018
6,0,0,0,0,0,0,0,0,0,07_2018
7,0,0,0,0,0,0,0,0,0,08_2018
8,0,0,0,0,0,0,0,0,0,09_2018
9,0,0,0,0,0,0,0,0,0,10_2018


In [20]:
Y_2021 = Year_20.append(Year_21, ignore_index=True)
Y_1821 = Month_1819.append(Y_2021, ignore_index= True)
Y_1821.to_csv('../Resources/M_C19_AU_2018_2021.csv')

### Quarterly 2018 and 2019

In [21]:
# Create data for 2018 and 2019
Year18 = [2018 for i in quar20]
Year19 = [2019 for i in quar20]
# year1819 = ['2018', '2018', '2018', '2018', '2019']
# Q18 = pd.DataFrame({'Year': 2018})
Q18 = pd.DataFrame({'ACT':0,'NSW':0,'NT':0, 'QLD':0,'SA':0,'TAS':0,'VIC':0,'WA':0,'AU': 0, 'Year' : Year18})
Q19 = pd.DataFrame({'ACT':0,'NSW':0,'NT':0, 'QLD':0,'SA':0,'TAS':0,'VIC':0,'WA':0,'AU':0,'Year' : Year19})
# Q19 = pd.)
Q1819 = Q18.append(Q19, ignore_index=True)

quar1819 = ['Q1', 'Q2','Q3','Q4', 'Q1', 'Q2','Q3','Q4']
quart1819=[]
for i in range(len(Q1819)):
    quart1819.append(f'{quar1819[i]}_{Q1819.iloc[i,9]}')
Q1819['Quartal'] = quart1819
Q1819

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU,Year,Quartal
0,0,0,0,0,0,0,0,0,0,2018,Q1_2018
1,0,0,0,0,0,0,0,0,0,2018,Q2_2018
2,0,0,0,0,0,0,0,0,0,2018,Q3_2018
3,0,0,0,0,0,0,0,0,0,2018,Q4_2018
4,0,0,0,0,0,0,0,0,0,2019,Q1_2019
5,0,0,0,0,0,0,0,0,0,2019,Q2_2019
6,0,0,0,0,0,0,0,0,0,2019,Q3_2019
7,0,0,0,0,0,0,0,0,0,2019,Q4_2019


In [22]:
Q2021 = Q20_cases.append(Q21_cases, ignore_index=True)
Q2021

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU,Year,Quartal
0,80,2032,17,743,337,69,917,364,4559,2020,Q1_2020
1,28,1171,12,324,106,159,1314,247,3361,2020,Q2_2020
2,5,1024,4,90,25,2,17952,74,19176,2020,Q3_2020
3,5,701,42,96,112,4,193,176,1329,2020,Q4_2020
4,5,368,33,224,76,0,108,83,897,2021,Q1_2021
5,0,106,28,43,37,0,20,38,272,2021,Q2_2021


### Total Quarterly Covid19 Cases in Australia 

In [23]:
Q_C19_AU= Q1819.append(Q2021, ignore_index = True)
Q_C19_AU.to_csv('../Resources/C19_AU_2018_2021.csv')
Q_C19_AU

Unnamed: 0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA,AU,Year,Quartal
0,0,0,0,0,0,0,0,0,0,2018,Q1_2018
1,0,0,0,0,0,0,0,0,0,2018,Q2_2018
2,0,0,0,0,0,0,0,0,0,2018,Q3_2018
3,0,0,0,0,0,0,0,0,0,2018,Q4_2018
4,0,0,0,0,0,0,0,0,0,2019,Q1_2019
5,0,0,0,0,0,0,0,0,0,2019,Q2_2019
6,0,0,0,0,0,0,0,0,0,2019,Q3_2019
7,0,0,0,0,0,0,0,0,0,2019,Q4_2019
8,80,2032,17,743,337,69,917,364,4559,2020,Q1_2020
9,28,1171,12,324,106,159,1314,247,3361,2020,Q2_2020
