In [1]:
# Dependencies
import pandas as pd
import numpy as np

In [2]:
# Path of csv file, originally converted using tabula
covid_excess_deaths_path = "Resources/Excess_Deaths_Associated_with_COVID-19.csv"

# Read the csv file
covid_excess_deaths_df = pd.read_csv(covid_excess_deaths_path, low_memory=False)

In [43]:
# Display the data table for preview
print()
print(f"Excess Deaths Probably Associated with Covid-19 Dataset")
covid_excess_deaths_df.head()


Excess Deaths Probably Associated with Covid-19 Dataset


Unnamed: 0,Week Ending Date,State,Observed Number,Upper Bound Threshold,Exceeds Threshold,Average Expected Count,Excess Lower Estimate,Excess Higher Estimate,Year,Total Excess Lower Estimate in 2020,Total Excess Higher Estimate in 2020,Percent Excess Lower Estimate,Percent Excess Higher Estimate,Type,Outcome,Suppress,Note
0,2017-01-14,Alabama,1130.0,1188.0,False,1029.0,0.0,101.0,2017,3743,5884,0.0,0.1,Predicted (weighted),All causes,,
1,2017-01-21,Alabama,1048.0,1201.0,False,1042.0,0.0,6.0,2017,3743,5884,0.0,0.0,Predicted (weighted),All causes,,
2,2017-01-28,Alabama,1026.0,1216.0,False,1057.0,0.0,0.0,2017,3743,5884,0.0,0.0,Predicted (weighted),All causes,,
3,2017-02-04,Alabama,1036.0,1216.0,False,1057.0,0.0,0.0,2017,3743,5884,0.0,0.0,Predicted (weighted),All causes,,
4,2017-02-11,Alabama,1058.0,1207.0,False,1053.0,0.0,5.0,2017,3743,5884,0.0,0.0,Predicted (weighted),All causes,,


In [60]:
# For convenience
df = covid_excess_deaths_df

In [62]:
# Create list of US states (unique)
states_list = df.State.unique().tolist()

# Remove non-state list entitites
states_list.remove('United States')
states_list.remove('Puerto Rico')
states_list.remove('New York City')
states_list.remove('District of Columbia')

print(states_list)
print()

# Count and print number of US states (unique) as a check
num_states = len(states_list)
print(f"Number of States and Territories = {num_states}")

['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']

Number of States and Territories = 50


In [41]:
# Create list of dates (unique weeks)
date_list = df["Week Ending Date"].unique().tolist()

# Count number of dates (unique weeks)
num_weeks = len(date_list)
num_years = (num_weeks / 52)
print(f"Reported Period = {round(num_years, 1)} Years")

# Report earliest and latest dates
earliest_date = df["Week Ending Date"].min()
latest_date = df["Week Ending Date"].max()
print(f"Earliest date is {earliest_date}")
print(f"Latest date is {latest_date}")

Reported Period = 3.8 Years
Earliest date is 2017-01-14
Latest date is 2020-10-17


In [5]:
# Sort by State and then Week Ending Date
df = df.sort_values(["State", "Week Ending Date"], ascending = (False, True))

In [6]:
# DEMONSTRATION OF EXTRACTION AND PROCESSING METHODOLOGY FOR A SINGLE STATE, ALABAMA

al_df = df.loc[df["State"] == "Alabama"]
al_df.head()

Unnamed: 0,Week Ending Date,State,Observed Number,Upper Bound Threshold,Exceeds Threshold,Average Expected Count,Excess Lower Estimate,Excess Higher Estimate,Year,Total Excess Lower Estimate in 2020,Total Excess Higher Estimate in 2020,Percent Excess Lower Estimate,Percent Excess Higher Estimate,Type,Outcome,Suppress,Note
0,2017-01-14,Alabama,1130.0,1188.0,False,1029.0,0.0,101.0,2017,3743,5884,0.0,0.1,Predicted (weighted),All causes,,
10638,2017-01-14,Alabama,1130.0,1188.0,False,1029.0,0.0,101.0,2017,499,2080,0.0,0.1,Predicted (weighted),"All causes, excluding COVID-19",,
21276,2017-01-14,Alabama,1130.0,1188.0,False,1029.0,0.0,101.0,2017,3187,5131,0.0,0.1,Unweighted,All causes,,
1,2017-01-21,Alabama,1048.0,1201.0,False,1042.0,0.0,6.0,2017,3743,5884,0.0,0.0,Predicted (weighted),All causes,,
10639,2017-01-21,Alabama,1048.0,1201.0,False,1042.0,0.0,6.0,2017,499,2080,0.0,0.0,Predicted (weighted),"All causes, excluding COVID-19",,


In [7]:
# Determine how many weeks NCHS believes data were incomplete at time of initial reporting

al_df2 = al_df.dropna(subset=["Note"])
al_df3 = al_df2.loc[al_df2["Type"] == "Unweighted"]
note_list = al_df3["Note"].unique()
print(f"Instances of Non-Null Note Values = {len(al_df3)}\n")
print(f"Number of Unique Note Values = {len(note_list)}\n")
print(note_list)

# "Data in recent weeks are incomplete. Only 60% of death records are submitted to NCHS within 10 days of 
#     the date of death, and completeness varies by jurisdiction. Data are not weighted and counts are likely
#     underreported."

Instances of Non-Null Note Values = 38

Number of Unique Note Values = 1

['Data in recent weeks are incomplete. Only 60% of death records are submitted to NCHS within 10 days of the date of death, and completeness varies by jurisdiction. Data are not weighted and counts are likely underreported.']


In [8]:
al_wt_all_df = al_df.loc[(al_df["Type"] == "Predicted (weighted)") & (al_df["Outcome"] == "All causes")]
al_wt_all_df.tail()

Unnamed: 0,Week Ending Date,State,Observed Number,Upper Bound Threshold,Exceeds Threshold,Average Expected Count,Excess Lower Estimate,Excess Higher Estimate,Year,Total Excess Lower Estimate in 2020,Total Excess Higher Estimate in 2020,Percent Excess Lower Estimate,Percent Excess Higher Estimate,Type,Outcome,Suppress,Note
192,2020-09-19,Alabama,1183.0,1057.0,True,980.0,126.0,203.0,2020,3743,5884,0.1,0.2,Predicted (weighted),All causes,,Data in recent weeks are incomplete. Only 60% ...
193,2020-09-26,Alabama,1157.0,1050.0,True,976.0,107.0,181.0,2020,3743,5884,0.1,0.2,Predicted (weighted),All causes,,Data in recent weeks are incomplete. Only 60% ...
194,2020-10-03,Alabama,1082.0,1056.0,True,984.0,26.0,98.0,2020,3743,5884,0.0,0.1,Predicted (weighted),All causes,,Data in recent weeks are incomplete. Only 60% ...
195,2020-10-10,Alabama,1136.0,1056.0,True,985.0,80.0,151.0,2020,3743,5884,0.1,0.2,Predicted (weighted),All causes,,Data in recent weeks are incomplete. Only 60% ...
196,2020-10-17,Alabama,1031.0,1046.0,False,976.0,0.0,55.0,2020,3743,5884,0.0,0.1,Predicted (weighted),All causes,,Data in recent weeks are incomplete. Only 60% ...


In [9]:
al_wt_xcov_df = al_df.loc[(al_df["Type"] == "Predicted (weighted)") & (al_df["Outcome"] == "All causes, excluding COVID-19")]
al_wt_xcov_df.tail()

Unnamed: 0,Week Ending Date,State,Observed Number,Upper Bound Threshold,Exceeds Threshold,Average Expected Count,Excess Lower Estimate,Excess Higher Estimate,Year,Total Excess Lower Estimate in 2020,Total Excess Higher Estimate in 2020,Percent Excess Lower Estimate,Percent Excess Higher Estimate,Type,Outcome,Suppress,Note
10830,2020-09-19,Alabama,1045.0,1057.0,False,980.0,0.0,65.0,2020,499,2080,0.0,0.1,Predicted (weighted),"All causes, excluding COVID-19",,Data in recent weeks are incomplete. Only 60% ...
10831,2020-09-26,Alabama,1054.0,1050.0,True,976.0,4.0,78.0,2020,499,2080,0.0,0.1,Predicted (weighted),"All causes, excluding COVID-19",,Data in recent weeks are incomplete. Only 60% ...
10832,2020-10-03,Alabama,971.0,1056.0,False,984.0,0.0,0.0,2020,499,2080,0.0,0.0,Predicted (weighted),"All causes, excluding COVID-19",,Data in recent weeks are incomplete. Only 60% ...
10833,2020-10-10,Alabama,1012.0,1056.0,False,985.0,0.0,27.0,2020,499,2080,0.0,0.0,Predicted (weighted),"All causes, excluding COVID-19",,Data in recent weeks are incomplete. Only 60% ...
10834,2020-10-17,Alabama,935.0,1046.0,False,976.0,0.0,0.0,2020,499,2080,0.0,0.0,Predicted (weighted),"All causes, excluding COVID-19",,Data in recent weeks are incomplete. Only 60% ...


In [10]:
# Extract into a new DataFrame the following columns of Data:
#     State, Week Ending Date, Total Deaths, and Average Expected Deaths
#     Rename columns as appropriate
al_sum_df = al_wt_all_df[["State","Week Ending Date","Observed Number","Average Expected Count"]]
al_sum_df = al_sum_df.rename(columns={"Observed Number": "Total Deaths", "Average Expected Count": "Avg Expect Deaths"})
al_sum_df.head()

Unnamed: 0,State,Week Ending Date,Total Deaths,Avg Expect Deaths
0,Alabama,2017-01-14,1130.0,1029.0
1,Alabama,2017-01-21,1048.0,1042.0
2,Alabama,2017-01-28,1026.0,1057.0
3,Alabama,2017-02-04,1036.0,1057.0
4,Alabama,2017-02-11,1058.0,1053.0


In [11]:
# Extract Total Deaths Excluding Covid-19 into a list
deaths_xcov = al_wt_xcov_df["Observed Number"].tolist()
deaths_xcov

[1130.0,
 1048.0,
 1026.0,
 1036.0,
 1058.0,
 1060.0,
 1099.0,
 1081.0,
 1011.0,
 1067.0,
 1068.0,
 991.0,
 1037.0,
 1017.0,
 966.0,
 996.0,
 907.0,
 1064.0,
 998.0,
 988.0,
 925.0,
 984.0,
 918.0,
 961.0,
 925.0,
 954.0,
 968.0,
 964.0,
 888.0,
 949.0,
 936.0,
 933.0,
 917.0,
 953.0,
 960.0,
 968.0,
 995.0,
 1019.0,
 948.0,
 986.0,
 966.0,
 941.0,
 994.0,
 994.0,
 982.0,
 1004.0,
 1067.0,
 1074.0,
 1120.0,
 1098.0,
 1080.0,
 1248.0,
 1301.0,
 1275.0,
 1286.0,
 1147.0,
 1181.0,
 1096.0,
 1099.0,
 985.0,
 1059.0,
 1086.0,
 993.0,
 995.0,
 1012.0,
 1026.0,
 1031.0,
 971.0,
 1039.0,
 950.0,
 1005.0,
 977.0,
 973.0,
 1011.0,
 940.0,
 974.0,
 927.0,
 972.0,
 994.0,
 959.0,
 930.0,
 962.0,
 969.0,
 938.0,
 943.0,
 936.0,
 944.0,
 912.0,
 998.0,
 972.0,
 945.0,
 943.0,
 971.0,
 993.0,
 927.0,
 963.0,
 1055.0,
 1009.0,
 1050.0,
 1044.0,
 1053.0,
 1105.0,
 1084.0,
 1077.0,
 1090.0,
 1114.0,
 1063.0,
 1095.0,
 1092.0,
 1103.0,
 1064.0,
 1050.0,
 1062.0,
 1090.0,
 1012.0,
 1079.0,
 1044.0,
 1031.

In [12]:
# Append Total Deaths Excluding Covid-19 into a new column
al_sum_df["Ttl Deaths x-Cov"] = al_wt_xcov_df["Observed Number"].tolist()
al_sum_df.head()

Unnamed: 0,State,Week Ending Date,Total Deaths,Avg Expect Deaths,Ttl Deaths x-Cov
0,Alabama,2017-01-14,1130.0,1029.0,1130.0
1,Alabama,2017-01-21,1048.0,1042.0,1048.0
2,Alabama,2017-01-28,1026.0,1057.0,1026.0
3,Alabama,2017-02-04,1036.0,1057.0,1036.0
4,Alabama,2017-02-11,1058.0,1053.0,1058.0


In [13]:
# Calculate Covid-19 Deaths and Append Column
al_sum_df["Cov Deaths"] = al_sum_df["Total Deaths"] - al_sum_df["Ttl Deaths x-Cov"]
al_sum_df.head()

Unnamed: 0,State,Week Ending Date,Total Deaths,Avg Expect Deaths,Ttl Deaths x-Cov,Cov Deaths
0,Alabama,2017-01-14,1130.0,1029.0,1130.0,0.0
1,Alabama,2017-01-21,1048.0,1042.0,1048.0,0.0
2,Alabama,2017-01-28,1026.0,1057.0,1026.0,0.0
3,Alabama,2017-02-04,1036.0,1057.0,1036.0,0.0
4,Alabama,2017-02-11,1058.0,1053.0,1058.0,0.0


In [14]:
# Calculate Excess Deaths and Append Column
al_sum_df["Excess Deaths"] = al_sum_df["Ttl Deaths x-Cov"] - al_sum_df["Avg Expect Deaths"]
al_sum_df.tail()

Unnamed: 0,State,Week Ending Date,Total Deaths,Avg Expect Deaths,Ttl Deaths x-Cov,Cov Deaths,Excess Deaths
192,Alabama,2020-09-19,1183.0,980.0,1045.0,138.0,65.0
193,Alabama,2020-09-26,1157.0,976.0,1054.0,103.0,78.0
194,Alabama,2020-10-03,1082.0,984.0,971.0,111.0,-13.0
195,Alabama,2020-10-10,1136.0,985.0,1012.0,124.0,27.0
196,Alabama,2020-10-17,1031.0,976.0,935.0,96.0,-41.0


In [15]:
# Calculate total negative values for Excess Deaths for entire period
excess_neg_df = al_sum_df.loc[al_sum_df["Excess Deaths"] < 0]
excess_neg_df["Excess Deaths"].sum()

-4920.0

In [16]:
# Calculate total positive values for Excess Deaths for entire period
excess_pos_df = al_sum_df.loc[al_sum_df["Excess Deaths"] >= 0]
excess_pos_df["Excess Deaths"].sum()

4032.0

In [17]:
# Remove negative values in Excess Deaths column
al_sum_df["Excess Deaths"] = np.where(al_sum_df["Excess Deaths"] < 0, 0, al_sum_df["Excess Deaths"])
al_sum_df.tail()

Unnamed: 0,State,Week Ending Date,Total Deaths,Avg Expect Deaths,Ttl Deaths x-Cov,Cov Deaths,Excess Deaths
192,Alabama,2020-09-19,1183.0,980.0,1045.0,138.0,65.0
193,Alabama,2020-09-26,1157.0,976.0,1054.0,103.0,78.0
194,Alabama,2020-10-03,1082.0,984.0,971.0,111.0,0.0
195,Alabama,2020-10-10,1136.0,985.0,1012.0,124.0,27.0
196,Alabama,2020-10-17,1031.0,976.0,935.0,96.0,0.0


In [18]:
al_sum_df["Prob Ttl Cov Deaths"] = al_sum_df["Cov Deaths"] + al_sum_df["Excess Deaths"]
al_sum_df.tail()

Unnamed: 0,State,Week Ending Date,Total Deaths,Avg Expect Deaths,Ttl Deaths x-Cov,Cov Deaths,Excess Deaths,Prob Ttl Cov Deaths
192,Alabama,2020-09-19,1183.0,980.0,1045.0,138.0,65.0,203.0
193,Alabama,2020-09-26,1157.0,976.0,1054.0,103.0,78.0,181.0
194,Alabama,2020-10-03,1082.0,984.0,971.0,111.0,0.0,111.0
195,Alabama,2020-10-10,1136.0,985.0,1012.0,124.0,27.0,151.0
196,Alabama,2020-10-17,1031.0,976.0,935.0,96.0,0.0,96.0


In [23]:
al_sum_df["Prob Under Report %"] = (al_sum_df["Excess Deaths"] / al_sum_df["Prob Ttl Cov Deaths"]) * 100
al_sum_df.tail(33)

Unnamed: 0,State,Week Ending Date,Total Deaths,Avg Expect Deaths,Ttl Deaths x-Cov,Cov Deaths,Excess Deaths,Prob Ttl Cov Deaths,Prob Under Report %
164,Alabama,2020-03-07,1053.0,1074.0,1053.0,0.0,0.0,0.0,
165,Alabama,2020-03-14,1088.0,1067.0,1088.0,0.0,21.0,21.0,100.0
166,Alabama,2020-03-21,1057.0,1062.0,1054.0,3.0,0.0,3.0,0.0
167,Alabama,2020-03-28,1071.0,1056.0,1048.0,23.0,0.0,23.0,0.0
168,Alabama,2020-04-04,1153.0,1041.0,1104.0,49.0,63.0,112.0,56.25
169,Alabama,2020-04-11,1132.0,1036.0,1051.0,81.0,15.0,96.0,15.625
170,Alabama,2020-04-18,1139.0,1031.0,1050.0,89.0,19.0,108.0,17.592593
171,Alabama,2020-04-25,1136.0,1023.0,1053.0,83.0,30.0,113.0,26.548673
172,Alabama,2020-05-02,1146.0,1018.0,1065.0,81.0,47.0,128.0,36.71875
173,Alabama,2020-05-09,1174.0,1013.0,1058.0,116.0,45.0,161.0,27.950311


In [97]:
# Extract first state (Alabama) results to master DataFrame
covid_deaths_df = al_sum_df[["State","Week Ending Date","Cov Deaths","Prob Ttl Cov Deaths", "Prob Under Report %"]].tail(33)

# Replace NaN value (due to divide by zero) with zero value in a single column
covid_deaths_df["Prob Under Report %"] = covid_deaths_df["Prob Under Report %"].fillna(0)

covid_deaths_df = covid_deaths_df.reset_index(drop=True)

covid_deaths_df

Unnamed: 0,State,Week Ending Date,Cov Deaths,Prob Ttl Cov Deaths,Prob Under Report %
0,Alabama,2020-03-07,0.0,0.0,0.0
1,Alabama,2020-03-14,0.0,21.0,100.0
2,Alabama,2020-03-21,3.0,3.0,0.0
3,Alabama,2020-03-28,23.0,23.0,0.0
4,Alabama,2020-04-04,49.0,112.0,56.25
5,Alabama,2020-04-11,81.0,96.0,15.625
6,Alabama,2020-04-18,89.0,108.0,17.592593
7,Alabama,2020-04-25,83.0,113.0,26.548673
8,Alabama,2020-05-02,81.0,128.0,36.71875
9,Alabama,2020-05-09,116.0,161.0,27.950311


In [None]:
# USE FOR LOOP TO TRAVERSE LIST OF STATES AGAINST A BLOCK OF CODE THAT EXTRACTS AND PROCESSES
#    THE CDC EXCESS DEATHS DATA SET TO YIELD A PROCESSED WEEKLY DATA SET BEGINNING IN MARCH 2020
#    AND RUNS TO OCTOBER 2020 (33 WEEKS), FOR EACH OF THE 50 US STATES.  THE KEY DATA BEING ANALYZED 
#    ARE (1) AFFIRMATIVELY REPORTED CASES OF COVID-19 DEATHS, (2) AN INFERENCED PROBABLE TOTAL 
#    COVID-19 DEATHS SINCE EXCESS BUT UNEXPLAINED DEATHS ARE PROBABLLISTICALLY LIKELY TO BE 
#    COVID-19 DEATHS, AND (3) A PERCENTAGE FIGURE THAT REPRESENTS THE DEGREE OF UNDER-REPORTING
#    OF COVID-19 DEATHS.  THE EXCESS DEATHS FIGURE IS BASED ON THE TOTAL OBSERVED DEATHS LESS
#    THE EXPECTED NUMBER OF DEATHS LESS THE NUMBER OF REPORTED COVID CASES. 

In [109]:
# Create an empty dataframe with column headers

master_df = pd.DataFrame(columns = ["State", "Week Ending Date","Cov Deaths","Prob Ttl Cov Deaths", "Prob Under Report %"])
master_df

Unnamed: 0,State,Week Ending Date,Cov Deaths,Prob Ttl Cov Deaths,Prob Under Report %


In [112]:
# Create a for loop
for state in states_list:
    
    df1 = df.loc[df['State'] == state]
    
    wt_all_df = df1.loc[(df1["Type"] == "Predicted (weighted)") & (df1["Outcome"] == "All causes")]
    wt_xcov_df = df1.loc[(df1["Type"] == "Predicted (weighted)") & (df1["Outcome"] == "All causes, excluding COVID-19")]
    
    sum_df = wt_all_df[["State","Week Ending Date","Observed Number","Average Expected Count"]]
    sum_df = sum_df.rename(columns={"Observed Number": "Total Deaths", "Average Expected Count": "Avg Expect Deaths"})
    
    sum_df["Ttl Deaths x-Cov"] = wt_xcov_df["Observed Number"].tolist()
    
    sum_df["Cov Deaths"] = sum_df["Total Deaths"] - sum_df["Ttl Deaths x-Cov"]
    
    sum_df["Excess Deaths"] = sum_df["Ttl Deaths x-Cov"] - sum_df["Avg Expect Deaths"]
    
    sum_df["Excess Deaths"] = np.where(sum_df["Excess Deaths"] < 0, 0, sum_df["Excess Deaths"])
    
    sum_df["Prob Ttl Cov Deaths"] = sum_df["Cov Deaths"] + sum_df["Excess Deaths"]
    
    sum_df["Prob Under Report %"] = (sum_df["Excess Deaths"] / sum_df["Prob Ttl Cov Deaths"]) * 100
    
    state_cov_deaths_df = sum_df[["State","Week Ending Date","Cov Deaths","Prob Ttl Cov Deaths", "Prob Under Report %"]].tail(33)
    
    state_cov_deaths_df["Prob Under Report %"] = state_cov_deaths_df["Prob Under Report %"].fillna(0)
    
    state_cov_deaths_df = state_cov_deaths_df.reset_index(drop=True)
    
    # Combine the data into a single dataset
    
    # x = master_df.append(state_cov_deaths_df, ignore_index = True)                             )
    
    master_df = master_df.append(state_cov_deaths_df)

master_df.head()

Unnamed: 0,State,Week Ending Date,Cov Deaths,Prob Ttl Cov Deaths,Prob Under Report %
0,Alabama,2020-03-07,0.0,0.0,0.0
1,Alabama,2020-03-14,0.0,21.0,100.0
2,Alabama,2020-03-21,3.0,3.0,0.0
3,Alabama,2020-03-28,23.0,23.0,0.0
4,Alabama,2020-04-04,49.0,112.0,56.25


In [114]:
d = {'Cov Deaths':'Pos-ID Covid Deaths', \
     'Prob Ttl Cov Deaths':'Prob Ttl Cov Deaths',\
     'Prob Under Report %':'Est. Under-Report %'}
master_group = master_df.groupby("State").agg({'Cov Deaths':'sum', \
                                               'Prob Ttl Cov Deaths':'sum',\
                                               'Prob Under Report %':'mean'}).rename(columns=d)

df.round(1)

master_group

Unnamed: 0_level_0,Pos-ID Covid Deaths,Prob Ttl Cov Deaths,Est. Under-Report %
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama,7746.0,11742.0,30.673077
Alaska,124.0,414.0,46.850366
Arizona,10544.0,18730.0,44.762199
Arkansas,3338.0,6012.0,35.233997
California,33598.0,53108.0,35.362307
Colorado,4220.0,7584.0,47.898324
Connecticut,9028.0,11076.0,29.776178
Delaware,1220.0,2514.0,53.757716
Florida,32340.0,46906.0,31.413797
Georgia,14016.0,21240.0,34.534819


In [117]:
master_group['Under-Report Check'] = (1 - (master_group['Pos-ID Covid Deaths'] / master_group['Prob Ttl Cov Deaths'])) * 100
master_group

Unnamed: 0_level_0,Pos-ID Covid Deaths,Prob Ttl Cov Deaths,Est. Under-Report %,Under-Report Check
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,7746.0,11742.0,30.673077,34.031681
Alaska,124.0,414.0,46.850366,70.048309
Arizona,10544.0,18730.0,44.762199,43.705286
Arkansas,3338.0,6012.0,35.233997,44.477711
California,33598.0,53108.0,35.362307,36.736462
Colorado,4220.0,7584.0,47.898324,44.35654
Connecticut,9028.0,11076.0,29.776178,18.49043
Delaware,1220.0,2514.0,53.757716,51.471758
Florida,32340.0,46906.0,31.413797,31.053597
Georgia,14016.0,21240.0,34.534819,34.011299
