In [3]:
import pandas as pd
from datetime import datetime

data = pd.read_csv("https://raw.githubusercontent.com/scalation/data/master/COVID-State/2023-03-22-21-09-15-State.csv")
# print(data.columns)

# Convert data to DataFrame
df = pd.DataFrame(data)

# Convert Date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Sort the data by Province_State and Date
df.sort_values(['Province_State', 'Date'], inplace=True)

# Group data by Province_State and calculate daily data
df['Confirmed'] = df.groupby('Province_State')['Confirmed'].diff().fillna(0)
df['Deaths'] = df.groupby('Province_State')['Deaths'].diff().fillna(0)
df['Recovered'] = df.groupby('Province_State')['Recovered'].diff().fillna(0)
df['Active'] = df.groupby('Province_State')['Active'].diff().fillna(0)
df['People_Tested'] = df.groupby('Province_State')['People_Tested'].diff().fillna(0)
df['People_Hospitalized'] = df.groupby('Province_State')['People_Hospitalized'].diff().fillna(0)
df['Testing_Rate'] = df.groupby('Province_State')['Testing_Rate'].diff().fillna(0)
df['Hospitalization_Rate'] = df.groupby('Province_State')['Hospitalization_Rate'].diff().fillna(0)
df['Incident_Rate'] = df.groupby('Province_State')['Incident_Rate'].diff().fillna(0)
df['Total_Test_Results'] = df.groupby('Province_State')['Total_Test_Results'].diff().fillna(0)
df['Case_Fatality_Ratio'] = df.groupby('Province_State')['Case_Fatality_Ratio'].diff().fillna(0)
df['Mortality_Rate'] = df.groupby('Province_State')['Mortality_Rate'].diff().fillna(0)
# print(df.head(20))

# Group data by Province_State and calculate sum of weekly data
df_weekly = df.groupby(['Province_State', pd.Grouper(key='Date', freq='W-SAT')]).agg({'Confirmed':'sum',
                                                                                      'Deaths':'sum',
                                                                                      'Recovered':'sum',
                                                                                      'Active':'sum',
                                                                                      'People_Tested':'sum',
                                                                                      'People_Hospitalized':'sum',
                                                                                      'Testing_Rate':'sum',
                                                                                      'Hospitalization_Rate':'sum',
                                                                                      'Incident_Rate':'sum',
                                                                                      'Lat':'first',
                                                                                      'Long_':'first',
                                                                                      'UID':'first',
                                                                                      'ISO3':'first',
                                                                                      'FIPS':'first',
                                                                                      'Total_Test_Results':'sum',
                                                                                      'Case_Fatality_Ratio':'sum',
                                                                                      'Mortality_Rate':'sum',
                                                                                      }).reset_index()

print(df_weekly.head(10))
now = datetime.now()
dt_string = now.strftime("%Y-%m-%d-%H-%M-%S")
df_weekly.to_csv(f"{dt_string}-State_Weekly.csv")

  Province_State       Date  Confirmed  Deaths  Recovered  Active  \
0        Alabama 2020-04-18     1121.0    60.0        0.0     0.0   
1        Alabama 2020-04-25     1486.0    56.0        0.0     0.0   
2        Alabama 2020-05-02     1426.0    79.0        0.0  7412.0   
3        Alabama 2020-05-09     2020.0   102.0        0.0 -7412.0   
4        Alabama 2020-05-16     2108.0    95.0        0.0     0.0   
5        Alabama 2020-05-23     2500.0    64.0     7951.0  5828.0   
6        Alabama 2020-05-30     3361.0    69.0     1404.0  1888.0   
7        Alabama 2020-06-06     2666.0    71.0     2040.0   555.0   
8        Alabama 2020-06-13     5009.0    84.0     2113.0  2812.0   
9        Alabama 2020-06-20     4319.0    65.0     2466.0  1788.0   

   People_Tested  People_Hospitalized  Testing_Rate  Hospitalization_Rate  \
0        23724.0                183.0           0.0              0.892949   
1        21422.0                219.0           0.0              0.765106   
2        