In [2]:
# Import the relevant libraries
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns
import matplotlib
import numpy as np
import matplotlib.dates as mdates
import matplotlib.ticker as mtick

%matplotlib inline

In [4]:
# Convert the csv files into Data Frames
#covid_meta_data = pd.read_csv('DOH COVID Data Drop_ 20221022 - 03 Metadata - Fields.csv')
covid_df_0 = pd.read_csv('Case Information 2023-04-15/DOH COVID Data Drop_ 20230415 - 04 Case Information_batch_0.csv',usecols = [1,2,3,6,7,10,11,12,13,17,19,20])
covid_df_1 = pd.read_csv('Case Information 2023-04-15/DOH COVID Data Drop_ 20230415 - 04 Case Information_batch_1.csv',usecols = [1,2,3,6,7,10,11,12,13,17,19,20])
covid_df_2 = pd.read_csv('Case Information 2023-04-15/DOH COVID Data Drop_ 20230415 - 04 Case Information_batch_2.csv',usecols = [1,2,3,6,7,10,11,12,13,17,19,20])
covid_df_3 = pd.read_csv('Case Information 2023-04-15/DOH COVID Data Drop_ 20230415 - 04 Case Information_batch_3.csv',usecols = [1,2,3,6,7,10,11,12,13,17,19,20])
covid_df_4 = pd.read_csv('Case Information 2023-04-15/DOH COVID Data Drop_ 20230415 - 04 Case Information_batch_4.csv',usecols = [1,2,3,6,7,10,11,12,13,17,19,20])

In [5]:
# Concatenate the two dataframes and reset their indices
covid_df = pd.concat([covid_df_0,covid_df_1,covid_df_2,covid_df_3,covid_df_4],axis = 0)
covid_df = covid_df.reset_index(drop = True)

In [6]:
# Change the columns with dates into proper date format
#covid_df['DateSpecimen'] = pd.to_datetime(covid_df.DateSpecimen)
#covid_df['DateResultRelease'] = pd.to_datetime(covid_df.DateResultRelease)
covid_df['DateRepConf'] = pd.to_datetime(covid_df.DateRepConf)
covid_df['DateDied'] = pd.to_datetime(covid_df.DateDied)
covid_df['DateOnset'] = pd.to_datetime(covid_df.DateOnset)
#covid_df['DateRecover'] = pd.to_datetime(covid_df.DateRecover)

covid_df.AgeGroup = covid_df.AgeGroup.replace('0 to 4' , '00 to 04')
covid_df.AgeGroup = covid_df.AgeGroup.replace('5 to 9', '05 to 09')
covid_df.AgeGroup = covid_df.AgeGroup.replace('80+', '80 to 110')

In [None]:
# Filters the data frame by replacing dates that don't lie within the given range with None
def FilterByDate(original_df, date_start ='2020-01-01', date_end = '2024-01-01'):
    # Convert dates to datetime data type
    date_start = pd.to_datetime(date_start)
    date_end = pd.to_datetime(date_end)
    df = original_df
    # Change values not within range to None so that they don't get counted
    #df.loc[df['DateRepConf'] < date_start,'DateRepConf'] = None
    #df.loc[df['DateRepConf'] > date_end,'DateRepConf'] = None
    #df.loc[df['DateDied'] < date_start,'DateDied'] = None
    #df.loc[df['DateDied'] > date_end,'DateDied'] = None
    #df.loc[df['DateOnset'] < date_start,'DateOnset'] = None
    #df.loc[df['DateOnset'] > date_end,'DateOnset'] = None
    
    df.loc[~df['DateRepConf'].between(date_start, date_end),'DateRepConf'] = None
    df.loc[~df['DateDied'].between(date_start, date_end),'DateDied'] = None
    df.loc[~df['DateOnset'].between(date_start, date_end),'DateOnset'] = None
    return df

# Filters a data frame by either: a  value in a column, or
#                                 a range of values in a column
def FilterBy(df,column,value1 ='',value2 = ''):
    if (value1 != '' and value2 != ''):
        df = df[df[column].between(value1,value2)]
    elif value1 != '':
        df = df[df[column]==value1]
    return df

# Filters a data frame by: region, province, city, admit,
#                          health status, sex, age, and date
def FilterByEverything(temp_df, 
                       region ='', 
                       province ='',
                       city='',
                       admit ='',
                       health_status ='',
                       sex ='',
                       age_start = 0,
                       age_end = 200,
                       date_start = '2020-01-01',
                       date_end = '2024-01-01'):
    df = temp_df.copy()
    df = FilterBy(df,'Age',age_start, age_end)
    df = FilterBy(df,'RegionRes',region)
    df = FilterBy(df,'ProvRes',province)
    df = FilterBy(df,'CityMunRes',city)
    df = FilterBy(df,'Admitted', admit)
    df = FilterBy(df, 'HealthStatus', health_status)
    df = FilterBy(df,'Sex',sex)
    df = FilterByDate(df, date_start, date_end)
    return df

# Groups a data frame by a specified date-related column
def GroupColumnByDate(df, date_column, data_column):
    df = df.groupby([date_column], as_index = False).size()
    df.rename(columns = {date_column : 'Date'},inplace = True)
    df.rename(columns = {'size':data_column},inplace = True)

    return df

# converts a data frame indexed by indivudal cases to one by dates, with a
# column for cases and deaths 
def IndexByCaseToDate(df):
    #df = temp_df.copy()
    # making Data Frames for New Cases, and New Deaths and renaming columns
    cases_df = GroupColumnByDate(df,'DateRepConf','new_cases')
    deaths_df = GroupColumnByDate(df,'DateDied','new_deaths')
    onset_df  = GroupColumnByDate(df, 'DateOnset', 'new_symptoms')
    
    # merging new cases and new deaths data frames by the date column
    new_df = cases_df.merge(deaths_df, on='Date',how='outer')
    new_df = new_df.merge(onset_df, on='Date',how='outer')
    return new_df

# outputs the date range if a wave number is given
def WaveToDate(wave):
    if wave == 1:
        date_start = '2020-01-01'
        date_end = '2020-05-22'
    elif wave == 2:
        date_start = '2020-05-23'
        date_end = '2021-01-10'
    elif wave == 3:
        date_start = '2021-01-11'
        date_end = '2021-07-27'
    elif wave == 4:
        date_start = '2021-07-28'
        date_end = '2021-12-27'
    elif wave == 5:
        date_start = '2021-12-28'
        date_end = '2022-06-06'
    elif wave == 6: 
        date_start ='2022-06-07'
        date_end = '2024-01-01'
    else:
        date_start = '2020-01-01'
        date_end = '2024-01-01'
    return date_start,date_end

# outputs the wave number if a date is given
def DateToWave(date):
    date = pd.to_datetime(date)
    if pd.to_datetime('2020-01-01') <= date <= pd.to_datetime('2020-05-22'):
        return 1
    elif pd.to_datetime('2020-05-23') <= date <= pd.to_datetime('2021-01-10'):
        return 2
    elif pd.to_datetime('2021-01-11') <= date <= pd.to_datetime('2021-07-27'):
        return 3
    elif pd.to_datetime('2021-07-28') <= date <= pd.to_datetime('2021-12-27'):
        return 4
    elif pd.to_datetime('2021-12-28') <= date <= pd.to_datetime('2022-06-06'):
        return 5
    elif pd.to_datetime('2022-06-07') <= date <= pd.to_datetime('2024-01-01'):
        return 6
    else:
        return None

In [None]:
# plots either Cases, Deaths, Death Rate, or an overlap of cases and deaths
# depending on date ragne, age range, and place of residence
def single_plot(original_df, 
            plot_type = 'cases_and_deaths',
            date_start = '2020-01-01',
            date_end = '2024-01-01',
            age_start = 0,
            age_end = 200,
            region = '',
            province = '',
            city = '',
            health_status ='',
            admit = '',
            sex = '',
            wave = 0
           ):
    df = original_df.copy()
    # If a wave is specified, a date range will be provided
    if wave > 0:
        date_start, date_end = WaveToDate(wave)
        
        
    # Filters a data frame by: region, province, city, admit,
    #                          health status, sex, age, and date
    df = FilterByEverything(
        df,
        region = region, 
        province = province, 
        city = city,
        health_status = health_status,
        admit = admit,
        sex = sex,
        age_start = age_start,
        age_end = age_end,
        date_start = date_start,
        date_end = date_end
    )
    
    # Converts the indexing from cases to dates
    df = IndexByCaseToDate(df)
    
    # total cases and deaths over the date range, and the death rate during this period
    total_cases = df.new_cases.sum()
    total_deaths = df.new_deaths.sum()
    death_rate = "{:.2%}".format(total_deaths/total_cases)
    
    if plot_type =='cases_and_deaths':
        
        # making the plots
        fig, ax1 = plt.subplots() 
       
        # setting the limit for each y-axis
        ylimit = max(df.new_cases.max()*1.1/100,df.new_deaths.max()*1.1)
        ax1.set_ylim([0,ylimit*100])       
        
        # making the x-axis label, and formatting its values
        ax1.set_xlabel('Date')
        ax1.locator_params(axis ='x', nbins=6)
        ax1.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
        plt.setp(ax1.get_xticklabels(), rotation=30, horizontalalignment='right')      
        
        # adding LHS y-axis label, plotting the graph
        ax1.set_ylabel('New Cases')
        l1, = ax1.plot(df.Date,df.new_cases, color = 'tab:blue', alpha = 0.5, label ='New Cases')
        
        # adding RHS y-axis label, plotting the graph
        ax2 = ax1.twinx() 
        ax2.set_ylim([0,ylimit])
        ax2.set_ylabel('New Deaths')
        l2, = ax2.plot(df.Date,df.new_deaths, color = 'tab:orange', alpha = 0.5, label ='New Deaths')
        

        # creates plot title, legends, and shows the plot
        plt.title('New deaths and cases over time')
        plt.legend([l1,l2],['New Cases', 'New Deaths'], loc ='upper left')
        plt.grid(True)
        
        plt.show()
        
    elif plot_type =='new_cases':
        
        # making the plots with axis-labels
        fig, ax = plt.subplots()
        ax.plot(df.Date, df.new_cases)
        ax.set_xlabel('Date')
        ax.set_ylabel('New Cases')
        
        # format the ticks
        #ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
        plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
        ylimit = df.new_cases.max()*1.1
        ax.set_ylim([0,ylimit])
        plt.title('New cases over time')
        plt.show()
        
    elif plot_type == 'new_deaths':
        
        # making the plots with axis-labels
        fig, ax = plt.subplots()
        ax.plot(df.Date, df.new_deaths)
        
        
        # sets the y limit to be 110% of whichever is the max
        ylimit = df.new_deaths.max()*1.1
        ax.set_ylim([0,ylimit])
        
        ax.set_xlabel('Date')
        ax.set_ylabel('New Deaths')
 
        
        # format the ticks
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
        plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
        
        plt.title('New deaths over time')
        plt.show()
        
    elif plot_type == 'death_rate':
        
        # grouping and summing the data by week
        df = df.groupby(pd.Grouper(key='Date', freq='W')).sum().reset_index()
        df['death_rate'] = df.new_deaths/df.new_cases
        
        # making the plots with axis-labels
        fig, ax = plt.subplots()
        ax.plot(df.Date,df.death_rate)
        ax.set_xlabel('Date')
        ax.set_ylabel('Weekly Death Rates')
        
        # formatting y-axis as percentages
        ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1.0))
        
        # format the ticks
        #ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=(1,3,5,7,9,11)))
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%y-%m"))
        #ax.xaxis.set_minor_locator(mdates.MonthLocator())     

        plt.title('Death rate over time')
        plt.show()
        
    return '{} plot from {} to {} with fatality rate of {}'.format(plot_type, str(df.Date.iloc[0].date()), str(df.Date.iloc[-1].date()),death_rate)

In [None]:
def grouping_plot(
                    original_df,
                    plot_type = 'new_cases',
                    age_start = 0,
                    age_end = 200,
                    region = '',
                    province = '',
                    city = '',
                    health_status = '',
                    admit = '',
                    sex = '',
                    specified_grouping = [''],
                    date_start = '2020-01-01',
                    date_end = '2024-01-01',
                    group_by = 'age_group',
                    plot_style ='line',
                    sort_by = 'date',
                    wave = 0
                                                ):
    df = original_df.copy()
    if wave > 0:
        date_start, date_end = WaveToDate(wave)
        
    # Filters by age range, Region, Povince, and City. Date will be done later
    df = FilterByEverything(
        df,
        region = region,
        province = province,
        city = city,
        health_status = health_status,
        admit = admit,
        sex = sex,
        age_start = age_start,
        age_end = age_end,
        date_start = date_start,
        date_end = date_end
    )
 
    if sort_by == 'date':
        # setup the data frame that has the general date range
        dates_cases_df = pd.DataFrame(df[df['DateRepConf'].notnull()].DateRepConf.unique())
        dates_cases_df.rename(columns = {0:'Date'},inplace = True)

        dates_deaths_df = pd.DataFrame(df[df['DateDied'].notnull()].DateDied.unique())
        dates_deaths_df.rename(columns = {0:'Date'},inplace = True)
        
    elif sort_by =='wave':
        # setup the data frame that has the general date range
        dates_cases_df = pd.DataFrame({
            'Wave':[1,2,3,4,5,6]
        })
        dates_deaths_df = ({
            'Wave':[1,2,3,4,5,6]
        })
    total_per_grouping = pd.DataFrame({
        'field':['total_cases','total_deaths','death_rate','admit_rate','wave']
    })
    total_per_grouping.set_index('field')

    if group_by == 'age_group':
        column = 'AgeGroup'
    elif group_by =='city':
        column = 'CityMunRes'
    
    for grouping in df[df[column].notnull()][column].sort_values().unique():

        # filter the data frame by age group
        temp_df = df[df[column] == grouping]
        # convert the data frame into one that has dates as indices vs individuals
        daily_temp_df = IndexByCaseToDate(temp_df)
        if sort_by == 'wave':
            daily_temp_df['wave'] = daily_temp_df.Date.apply(DateToWave)
            daily_temp_df = daily_temp_df.groupby(['wave'], as_index = False).sum()
        
            #daily_temp_df[grouping] = daily_temp_df['new_cases']
            grouping_df = daily_temp_df[['wave','new_cases']].copy()
            grouping_df.rename(columns = {'new_cases' : grouping},inplace = True)
            dates_cases_df = dates_cases_df.merge(grouping_df, on='Wave', how='outer')

            #daily_temp_df[grouping] = daily_temp_df['new_deaths']
            grouping_df = daily_temp_df[['wave','new_deaths']].copy()
            grouping_df.rename(columns = {'new_deaths' : grouping},inplace = True)    
            dates_deaths_df = dates_deaths_df.merge(grouping_df, on='Wave', how='outer')
        elif sort_by =='date':

            #daily_temp_df[grouping] = daily_temp_df['new_cases']
            grouping_df = daily_temp_df[['Date','new_cases']].copy()
            grouping_df.rename(columns = {'new_cases' : grouping},inplace = True)
            dates_cases_df = dates_cases_df.merge(grouping_df, on='Date', how='outer')

            #daily_temp_df[grouping] = daily_temp_df['new_deaths']
            grouping_df = daily_temp_df[['Date','new_deaths']].copy()
            grouping_df.rename(columns = {'new_deaths' : grouping},inplace = True)    
            dates_deaths_df = dates_deaths_df.merge(grouping_df, on='Date', how='outer')
            
            total_cases_grouping = daily_temp_df['new_cases'].sum()
            total_deaths_grouping = daily_temp_df['new_deaths'].sum()
            fatality_grouping = total_deaths_grouping/total_cases_grouping

        if plot_type == 'admit_rate':
            temp_admit_df = temp_df[temp_df['Admitted']=='YES']
            daily_temp_admit_df = IndexByCaseToDate(temp_admit_df)
            total_admit_grouping = daily_temp_admit_df.new_cases.sum()
            admit_rate_grouping = total_admit_grouping/total_cases_grouping
            total_per_grouping[grouping] = pd.DataFrame({
                grouping :[total_cases_grouping,total_deaths_grouping,fatality_grouping,admit_rate_grouping,wave]
            })
        else:
            total_per_grouping[grouping] = pd.DataFrame({
                grouping :[total_cases_grouping,total_deaths_grouping,fatality_grouping,None,wave]
            })

    total_cases = dates_cases_df.sum().sum()
    total_deaths = dates_deaths_df.sum().sum()
    
    if plot_type =='death_rate':
        return 'The death rate  is {}%, with a total of {} reported deaths and {} reported cases'.format(
            round(total_deaths/total_cases*100,2)/100, int(total_deaths), int(total_cases))
    
    if plot_style == 'bar':
       
        total_per_grouping.index.name = 'index'
        temp_df = total_per_grouping.set_index('field').T
        temp_df.index.name = 'age_group'
        fig,ax = plt.subplots()
        temp_df =temp_df.sort_values(by='age_group',ascending =False)
        if plot_type == 'fatality':
            sns.barplot(ax = ax, data=temp_df,y=temp_df.index, x='death_rate', orient = 'h')
            ax.xaxis.set_major_formatter(mtick.FuncFormatter(lambda x, pos: "{:.2%}".format(x))) 
        elif plot_type == 'new_cases':
            sns.barplot(ax = ax, data=temp_df,y=temp_df.index, x='total_cases', orient = 'h')
        elif plot_type == 'admit_rate':    
            sns.barplot(ax = ax, data=temp_df,y=temp_df.index, x='admit_rate', orient = 'h')
            ax.xaxis.set_major_formatter(mtick.FuncFormatter(lambda x, pos: "{:.2%}".format(x))) 
            
    elif plot_style == 'pie':
        palette_color = sns.color_palette('Pastel1')      
        if plot_type =='new_cases':
            if specified_grouping[0] !='':
                specified_grouping.insert(0,'Date')
                dates_cases_df = dates_cases_df[specified_grouping]
            keys = dates_cases_df.columns[1:]
            plt.pie(dates_cases_df.sum(),colors=palette_color, labels = keys)
        elif plot_type =='new_deaths':
            if specified_grouping[0] != '':
                specified_grouping.insert(0,'Date')
                dates_deaths_df = dates_deaths_df[specified_grouping]
            keys = dates_deaths_df.columns[1:]
            plt.pie(dates_deaths_df.sum(),colors=palette_color, labels = keys)

        plt.show()    
    elif plot_style == 'line':
        if plot_type =='new_cases':
            if specified_grouping[0] !='':
                specified_grouping.insert(0,'Date')
                dates_cases_df = dates_cases_df[specified_grouping]
            dates_df = dates_cases_df
        elif plot_type =='new_deaths':
            if specified_grouping[0] != '':
                specified_grouping.insert(0,'Date')
                dates_deaths_df = dates_deaths_df[specified_grouping]
            dates_df = dates_deaths_df
            
        fig, ax = plt.subplots()  
        for grouping in dates_df.columns[1:]:
            if plot_type == 'death_rate':
                dates_df[grouping] = dates_deaths_df[grouping]/dates_cases_df[grouping]
            ax.plot(dates_df.Date,dates_df[grouping],label = grouping) 

        ax.set_xlabel('Date')
        ax.set_ylabel(plot_type)
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%y-%m"))
        plt.legend()
        plt.show()
        temp_df
    return 'here is your {} plot from {} to {}'.format(plot_type,date_start,date_end)

In [None]:
single_plot(
                covid_df,
                plot_type = 'cases_and_deaths',
                date_start = '2020-01-01'
)

In [None]:
single_plot(
                covid_df,
                plot_type = 'cases_and_deaths',
                wave = 6
)

In [None]:
grouping_plot(
                covid_df,
                plot_type = 'new_cases',
                wave = 6,
                group_by  = 'age_group',
                plot_style = 'pie'
)

In [None]:
grouping_plot(
                covid_df,
                plot_type = 'new_deaths',
                wave = 6,
                group_by  = 'age_group',
                plot_style = 'pie'
)

In [None]:
grouping_plot(
covid_df,
plot_type = 'fatality',
wave = 6,
group_by = 'age_group',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'fatality',
wave = 5,
group_by = 'age_group',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'fatality',
wave = 4,
group_by = 'age_group',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'fatality',
wave = 3,
group_by = 'age_group',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'new_cases',
wave = 6,
admit = 'YES',
    group_by = 'age_group',
plot_style = 'pie')

In [None]:
grouping_plot(
covid_df,
plot_type = 'new_cases',
wave = 6,
group_by = 'age_group',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'new_cases',
wave = 6,
group_by = 'age_group',
admit = 'YES',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'admit_rate',
wave = 6,
group_by = 'age_group',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'admit_rate',
wave = 5,
group_by = 'age_group',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'admit_rate',
wave = 4,
group_by = 'age_group',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'admit_rate',
wave = 6,
region = 'NCR',
group_by = 'city',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'admit_rate',
wave = 6,
region = 'NCR',
group_by = 'city',
plot_style = 'bar')

In [None]:
grouping_plot(
covid_df,
plot_type = 'new_cases',
wave = 6,
region = 'NCR',
group_by = 'city',
plot_style = 'bar')

Variables
1. Age
2. Age Group
3. Sex
4. Date Reported
5. Date Died
6. Admitted
7. Region
8. Province
9. City
10. Health Status
11. Date Onset
12. Pregnanttab


Check conventions for Python variable and function naming and creation
Start on another project that can show range for portfolio, but I can continue this as a passion project
Different kind of range of data set (maybe ocean temperatures and bleaching)
Do not reinvent the wheel lmao

Input a region and get a graph of either cases or deaths for all the cities in the region and have them all overlapn

In [None]:
https://towardsdatascience.com/making-heat-maps-with-literal-maps-how-to-use-python-to-construct-a-chloropleth-6b65e4e33905

In [None]:
df = covid_df
df = ConvertIndexingFromCaseCodesToDates(df)
df['SMA30'] = df['new_cases'].rolling(30).mean()
df.dropna(inplace=True)
df = df.set_index('Date')

In [None]:
df[['new_cases','SMA30']].plot(label='Cases')

In [None]:
IndexByCaseToDate(covid_df)

In [None]:
df = covid_df.copy()

In [None]:
df = covid_df.copy()
df.index.name = 'case'
df

In [None]:
df = df.set_index('DateRepConf')
df

In [None]:
df = df.reset_index()
df

In [None]:
df = covid_df.copy()
column = 'AgeGroup'
wave = 0
# setup the data frame that has the general date range
dates_cases_df = pd.DataFrame(df[df['DateRepConf'].notnull()].DateRepConf.unique())
dates_cases_df.rename(columns = {0:'Date'},inplace = True)

dates_deaths_df = pd.DataFrame(df[df['DateDied'].notnull()].DateDied.unique())
dates_deaths_df.rename(columns = {0:'Date'},inplace = True)

total_per_grouping = pd.DataFrame({
    'field':['total_cases','total_deaths','death_rate','admit_rate','wave']
})
total_per_grouping.set_index('field')

for grouping in df[df[column].notnull()][column].sort_values().unique():

    # filter the data frame by age group
    temp_df = df[df[column] == grouping]

    # convert the data frame into one that has dates as indices vs individuals
    daily_temp_df = IndexByCaseToDate(temp_df)

    #daily_temp_df[grouping] = daily_temp_df['new_cases']
    grouping_df = daily_temp_df[['Date','new_cases']].copy()
    grouping_df.rename(columns = {'new_cases' : grouping},inplace = True)
    dates_cases_df = dates_cases_df.merge(grouping_df, on='Date', how='outer')

    #daily_temp_df[grouping] = daily_temp_df['new_deaths']
    grouping_df = daily_temp_df[['Date','new_deaths']].copy()
    grouping_df.rename(columns = {'new_deaths' : grouping},inplace = True)    
    dates_deaths_df = dates_deaths_df.merge(grouping_df, on='Date', how='outer')

    total_cases_grouping = daily_temp_df['new_cases'].sum()
    total_deaths_grouping = daily_temp_df['new_deaths'].sum()
    fatality_grouping = total_deaths_grouping/total_cases_grouping

    temp_admit_df = temp_df[temp_df['Admitted']=='YES']
    daily_temp_admit_df = IndexByCaseToDate(temp_admit_df)
    total_admit_grouping = daily_temp_admit_df.new_cases.sum()
    admit_rate_grouping = total_admit_grouping/total_cases_grouping
    total_per_grouping[grouping] = pd.DataFrame({
        grouping :[total_cases_grouping,total_deaths_grouping,fatality_grouping,admit_rate_grouping,wave]
    })

In [None]:
temp = total_per_grouping.copy()
temp = temp.set_index('field')
temp = temp.T
temp.index.name = 'age_group'
output = temp.to_string(
formatters={
    'total_cases': '{:,.0f}'.format,
    'total_deaths': '{:,.0f}'.format,
    'death_rate': '{:.2%}'.format,
    'admit_rate': '{:.2%}'.format
}
)
#print(output)
temp.columns

In [None]:
temp

In [None]:
temp_df = covid_df.copy()
temp_df = temp_df[temp_df.Age.between(40,150)]
column = 'AgeGroup'
final_df = pd.DataFrame(IndexByCaseToDate(temp_df).columns).set_index(0).T
final_df['wave'] = None
final_df['AgeGroup'] = None
for grouping in temp_df[temp_df[column].notnull()][column].sort_values().unique():
    grouping_df = temp_df[temp_df.AgeGroup == grouping]
    daily_temp_df = IndexByCaseToDate(grouping_df)
    daily_temp_df['wave'] = daily_temp_df.Date.apply(DateToWave)
    daily_temp_df['AgeGroup'] = grouping
    final_df = pd.concat([final_df,daily_temp_df],axis = 0)

In [None]:
new_df = final_df.groupby(['AgeGroup','wave'],as_index = False).sum()
new_df['death_rate'] = new_df.new_deaths/new_df.new_cases
new_df = new_df.sort_values('AgeGroup',ascending = False)
fig, ax = plt.subplots()
sns.barplot(ax = ax, data=new_df,x='AgeGroup', hue = 'wave', y='death_rate')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda y, pos: "{:.0%}".format(y))) 