In [1]:
import os
import pandas as pd

# Updated directory containing the CSV files
data_dir = '../johns_hopkins_github_data_pull/covid_reports_2021-23_us'

# List all CSV files in the directory
csv_files = [f for f in os.listdir(data_dir) if f.endswith('.csv')]

# Initialize an empty list to hold individual DataFrames
dataframes = []

# Load each CSV file into a DataFrame
for file in csv_files:
    file_path = os.path.join(data_dir, file)
    df = pd.read_csv(file_path)
    # Extract the date from the filename and create a new column for it
    date = file.split('.')[0]  # Assumes filename is of the form 'MM-DD-YYYY.csv'
    df['Date'] = pd.to_datetime(date, format='%m-%d-%Y')
    # Append the DataFrame to the list
    dataframes.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(dataframes, ignore_index=True)

# Filter for 'US' country and select necessary columns
us_data = combined_df[combined_df['Country_Region'] == 'US']

# Rename the 'Long' column to 'Long_'
us_data = us_data.rename(columns={'Long': 'Long_'})

# Pivot the DataFrame to get 'Confirmed' cases as columns
pivot_df = us_data.pivot_table(index=['Province_State', 'Lat', 'Long_'], columns='Date', values='Confirmed', aggfunc='sum')

# Flatten the columns and rename them to include the date in the format 'Confirmed MM-DD-YYYY'
pivot_df.columns = [f'Confirmed {date.strftime("%m-%d-%Y")}' for date in pivot_df.columns]

# Reset index to include 'Province_State', 'Lat', and 'Long_' as columns
final_df = pivot_df.reset_index()

# Display the final DataFrame
print(final_df)

# Save the final DataFrame to a CSV file
final_df.to_csv('consolidated_covid_data_us_with_lat_long_.csv', index=False)


              Province_State      Lat     Long_  Confirmed 01-01-2021  \
0                    Alabama  32.3182  -86.9023                365747   
1                     Alaska  61.3707 -152.4044                 47019   
2             American Samoa -14.2710 -170.1320                     0   
3                    Arizona  33.7298 -111.4312                530267   
4                   Arkansas  34.9697  -92.3731                229442   
5                 California  36.1162 -119.6816               2434971   
6                   Colorado  39.0598 -105.3111                362438   
7                Connecticut  41.5978  -72.7554                185708   
8                   Delaware  39.3185  -75.5071                 58064   
9       District of Columbia  38.8974  -77.0268                 29252   
10                   Florida  27.7663  -81.6868               1323315   
11                   Georgia  33.0406  -83.6431                677589   
12                      Guam  13.4443  144.7937    