In [1]:
import pandas as pd

# Step 1: Read the data from the confirmed CSV file
confirmed_file = 'rawData/monthly_confirmed.csv'
df_confirmed = pd.read_csv(confirmed_file)

# Step 2: Fill missing values in "Province/State" column with null
df_confirmed["Province/State"].fillna(value='unknown', inplace=True)

# Step 3: Melt the data to unpivot the date columns
id_vars =  ["Province/State","Country/Region","Lat","Long"]
df_confirmed_melted = df_confirmed.melt(id_vars=id_vars, var_name="date", value_name="confirmed_cases")

# Step 4: Rename the "Country/Region" column to "Country_Region"
df_confirmed_melted.rename(columns={'Country/Region': 'Country_Region'}, inplace=True)

# Step 5: Rename the "Province/State" column to "Province_State"
df_confirmed_melted.rename(columns={'Province/State': 'Province_State'}, inplace=True)

# Step 6: Convert the "date" column to a consistent date format (e.g., 'YYYY-MM')
df_confirmed_melted['date'] = pd.to_datetime(df_confirmed_melted['date'], format='%B/%y').dt.strftime('%Y-%m')

# Step 7: Optionally, sort the data frame by country and date
df_confirmed_final = df_confirmed_melted.sort_values(by=["Country_Region"]).reset_index(drop=True)

# Display the final data frame
df_confirmed_final.head(100)

Unnamed: 0,Province_State,Country_Region,Lat,Long,date,confirmed_cases
0,unknown,Afghanistan,33.93911,67.709953,2020-01,0.0
1,unknown,Afghanistan,33.93911,67.709953,2021-04,1733260.0
2,unknown,Afghanistan,33.93911,67.709953,2021-03,1737068.0
3,unknown,Afghanistan,33.93911,67.709953,2021-02,1553102.0
4,unknown,Afghanistan,33.93911,67.709953,2023-01,6448651.0
...,...,...,...,...,...,...
95,unknown,Algeria,28.03390,1.659600,2022-01,7141014.0
96,unknown,Algeria,28.03390,1.659600,2021-12,6636393.0
97,unknown,Algeria,28.03390,1.659600,2020-12,2884170.0
98,unknown,Algeria,28.03390,1.659600,2021-01,3212268.0


In [2]:
import pandas as pd
#DO THIS FOR THE DEATHS CSV

# Step 1: Read the data from the confirmed CSV file
deaths_file = 'rawData/monthly_deaths.csv'
df_deaths = pd.read_csv(deaths_file)

# Step 2: Fill missing values in "Province/State" column with null
df_deaths["Province/State"].fillna(value='unknown', inplace=True)

# Step 3: Melt the data to unpivot the date columns
id_vars =  ["Province/State","Country/Region","Lat","Long"]
df_deaths_melted = df_deaths.melt(id_vars=id_vars, var_name="date", value_name="deaths")

# Step 4: Rename the "Country/Region" column to "Country_Region"
df_deaths_melted.rename(columns={'Country/Region': 'Country_Region'}, inplace=True)

# Step 5: Rename the "Province/State" column to "Province_State"
df_deaths_melted.rename(columns={'Province/State': 'Province_State'}, inplace=True)
# Step 6: Convert the "date" column to a consistent date format (e.g., 'YYYY-MM')
df_deaths_melted['date'] = pd.to_datetime(df_deaths_melted['date'], format='%B/%y').dt.strftime('%Y-%m')

# Step 7: Optionally, sort the data frame by country and date
df_deaths_final = df_deaths_melted.sort_values(by=["Country_Region"]).reset_index(drop=True)

# Display the final data frame
df_deaths_final.head(100)

Unnamed: 0,Province_State,Country_Region,Lat,Long,date,deaths
0,unknown,Afghanistan,33.93911,67.709953,2020-01,0
1,unknown,Afghanistan,33.93911,67.709953,2021-04,76300
2,unknown,Afghanistan,33.93911,67.709953,2021-03,76227
3,unknown,Afghanistan,33.93911,67.709953,2021-02,67864
4,unknown,Afghanistan,33.93911,67.709953,2023-01,243684
...,...,...,...,...,...,...
95,unknown,Algeria,28.03390,1.659600,2022-01,199006
96,unknown,Algeria,28.03390,1.659600,2021-12,191320
97,unknown,Algeria,28.03390,1.659600,2020-12,81232
98,unknown,Algeria,28.03390,1.659600,2021-01,87734


In [3]:
import pandas as pd
#DO THIS FOR RECOVERY
# Step 1: Read the data from the confirmed CSV file
recovery_file = 'rawData/monthly_recovery.csv'
df_recovery = pd.read_csv(recovery_file)

# Step 2: Fill missing values in "Province/State" column with null
df_recovery["Province/State"].fillna(value='unknown', inplace=True)

# Step 3: Melt the data to unpivot the date columns
id_vars =  ["Province/State","Country/Region","Lat","Long"]
df_recovery_melted = df_recovery.melt(id_vars=id_vars, var_name="date", value_name="recovery")

# Step 4: Rename the "Country/Region" column to "Country_Region"
df_recovery_melted.rename(columns={'Country/Region': 'Country_Region'}, inplace=True)
# Step 5: Rename the "Province/State" column to "Province_State"
df_recovery_melted.rename(columns={'Province/State': 'Province_State'}, inplace=True)

# Step 6: Convert the "date" column to a consistent date format (e.g., 'YYYY-MM')
df_recovery_melted['date'] = pd.to_datetime(df_recovery_melted['date'], format='%B/%y').dt.strftime('%Y-%m')

# Step 7: Optionally, sort the data frame by country and date
df_recovery_final = df_recovery_melted.sort_values(by=["Country_Region"]).reset_index(drop=True)

# Display the final data frame
df_recovery_final.head(100)

Unnamed: 0,Province_State,Country_Region,Lat,Long,date,recovery
0,unknown,Afghanistan,33.93911,67.709953,2020-01,0
1,unknown,Afghanistan,33.93911,67.709953,2020-08,843438
2,unknown,Afghanistan,33.93911,67.709953,2020-06,184768
3,unknown,Afghanistan,33.93911,67.709953,2021-03,1541980
4,unknown,Afghanistan,33.93911,67.709953,2021-05,1710784
...,...,...,...,...,...,...
95,unknown,Antarctica,-71.94990,23.347000,2020-08,0
96,unknown,Antarctica,-71.94990,23.347000,2021-07,0
97,unknown,Antarctica,-71.94990,23.347000,2020-03,0
98,unknown,Antarctica,-71.94990,23.347000,2020-11,0


In [4]:
import sqlite3
import pandas as pd

# Create SQLite database and tables
conn = sqlite3.connect('database/dashboard_data.db')
df_confirmed_final.to_sql('confirmed', conn, if_exists='replace', index=False)
df_deaths_final.to_sql('deaths', conn, if_exists='replace', index=False)
df_recovery_final.to_sql('recovery', conn, if_exists='replace', index=False)

# Verify the database
query = "SELECT * FROM confirmed LIMIT 5;"
result = conn.execute(query).fetchall()
print(result)



[('unknown', 'Afghanistan', 33.93911, 67.709953, '2020-01', 0.0), ('unknown', 'Afghanistan', 33.93911, 67.709953, '2021-04', 1733260.0), ('unknown', 'Afghanistan', 33.93911, 67.709953, '2021-03', 1737068.0), ('unknown', 'Afghanistan', 33.93911, 67.709953, '2021-02', 1553102.0), ('unknown', 'Afghanistan', 33.93911, 67.709953, '2023-01', 6448651.0)]


In [5]:
# Connect to the SQLite database
conn = sqlite3.connect('database/dashboard_data.db')
cursor = conn.cursor()

# Get the list of tables in the database
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

# Print the table names
print("Tables in dashboard_data.db:")
for table in tables:
    print(table[0])

# Close the database connection
conn.close()

Tables in dashboard_data.db:
confirmed
deaths
recovery


In [6]:
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect('database/dashboard_data.db')
cursor = conn.cursor()

# Function to get column names of a table
def get_column_names(table_name):
    cursor.execute(f"PRAGMA table_info({table_name});")
    columns = cursor.fetchall()
    return [column[1] for column in columns]

# Get the list of tables in the database
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

# Print column names for each table
for table in tables:
    table_name = table[0]
    print(f"Columns in {table_name}:")
    columns = get_column_names(table_name)
    print(columns)

# Close the database connection
conn.close()


Columns in confirmed:
['Province_State', 'Country_Region', 'Lat', 'Long', 'date', 'confirmed_cases']
Columns in deaths:
['Province_State', 'Country_Region', 'Lat', 'Long', 'date', 'deaths']
Columns in recovery:
['Province_State', 'Country_Region', 'Lat', 'Long', 'date', 'recovery']
