In [1]:
# Dependencies and Setup
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
import seaborn as sn

# Study data file
stanford_path = "df_us_filtered.csv"

stanford_df = pd.read_csv(stanford_path)

In [2]:
# Preview Data Frame
stanford_df.head()
print(len(stanford_df))

2252


In [9]:
stanford_df["covid_exercise"].replace("", np.nan, inplace=True)
stanford_df["physical_activities"].replace("", np.nan, inplace=True)

In [11]:
# # In case there are any empty values, drop surveys that didn't respond to post and pre-covid exercise Q&A.
stanford_df["covid_exercise"].dropna(inplace = True)
stanford_df["physical_activities"].dropna(inplace = True)
print(len(stanford_df))

2252


In [12]:
completed_health = stanford_df.dropna(subset=["physical_activities", "covid_exercise"])

In [13]:
diabetes = completed_health[completed_health["diabetes"] == 1]
cardiovascular_disorders = completed_health[completed_health["cardiovascular_disorders"] == 1]
obesity = completed_health[completed_health["obesity"] == 1]
respiratory_infections = completed_health[completed_health["respiratory_infections"] == 1]
respiratory_disorders = completed_health[completed_health["respiratory_disorders_exam"] == 1]
gastrointenstinal_disorders = completed_health[completed_health["gastrointestinal_disorders"] == 1]
kidney_disease = completed_health[completed_health["chronic_kidney_disease"] == 1]
autoimmune_disease = completed_health[completed_health["autoimmune_disease"] == 1]
fatigue_syndrome = completed_health[completed_health["chronic_fatigue_syndrome_a"] == 1]
preexisting_total = (diabetes + cardiovascular_disorders + obesity + respiratory_infections + 
                     respiratory_disorders + gastrointenstinal_disorders + kidney_disease + 
                     autoimmune_disease + fatigue_syndrome)
preexisting_total.head(10)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,study_id,redcap_survey_identifier,introduction_timestamp,country,state_in_india,state_in_us,province_in_china,state_other_countries,...,sector_of_contribution___4,sector_of_contribution___5,other,where_helped___1,where_helped___2,where_helped___3,where_helped___4,where_helped___5,other_where_helped,social_contributions_complete
1,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
8,,,,,,,,,,,...,,,,,,,,,,
18,,,,,,,,,,,...,,,,,,,,,,
19,,,,,,,,,,,...,,,,,,,,,,
20,,,,,,,,,,,...,,,,,,,,,,
23,,,,,,,,,,,...,,,,,,,,,,
25,,,,,,,,,,,...,,,,,,,,,,
40,,,,,,,,,,,...,,,,,,,,,,


In [14]:
# Computation of data for those that identified as having a pre-existing condition.
p_physical_mean = preexisting_total.groupby(["country"],as_index=True).physical_activities.mean()
p_physical_median = preexisting_total.groupby(["country"],as_index=True).physical_activities.median()
# p_physical_mode = preexisting_total.groupby(["country"],as_index=True).physical_activities.mode()
p_physical_sem = preexisting_total.groupby(["country"],as_index=True).physical_activities.sem()
p_physical_std = preexisting_total.groupby(["country"],as_index=True).physical_activities.std()
p_covid_mean = preexisting_total.groupby(["country"],as_index=True).covid_exercise.mean()
p_covid_median = preexisting_total.groupby(["country"],as_index=True).covid_exercise.median()
# p_meancovid_mode = preexisting_us.groupby(["country"],as_index=True).covid_exercise.mode()
p_covid_sem = preexisting_total.groupby(["country"],as_index=True).covid_exercise.sem()
p_covid_std = preexisting_total.groupby(["country"],as_index=True).covid_exercise.std()

In [16]:
p_covid_std

Series([], Name: covid_exercise, dtype: float64)

In [15]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the preexisting conditions
print(f"The mean physical exercise prior to COVID for individuals with preexisting conditions is: {preexisting_total.physical_activities.mean()}")
print(f"The mean physical activities during COVID for individuals with preexisting conditions is: {preexisting_total.covid_exercise.mean()}")
print(f"The variance of physical exercise prior to COVID for individuals with preexisting conditions is: {preexisting_total.covid_exercise.min()} to {covid_exercise.physical_activities.max()}")
print(f"The variance of physical exercise during to COVID for individuals with preexisting conditions is: {preexisting_total.physical_activities.min()} to {preexisting_total.physical_activities.max()}")
print(f"The standard deviation of all physical acitivities prior to COVID for individuals with preexisting conditions is: {preexisting_total.physical_activities.std()}")
print(f"The standard deviation of all physical acitivities during COVID for individuals with preexisting conditions is: {preexisting_total.covid_exercise.std()}")
print(f"The standard error of all physical activities prior to COVID for individuals with preexisting conditions is: {preexisting_total.physical_activities.sem()}")
print(f"The standard error of all physical activities during COVID for individuals with preexisting conditions is: {preexisting_total.covid_exercise.sem()}")

The mean physical exercise prior to COVID for individuals with preexisting conditions is: nan
The mean physical activities during COVID for individuals with preexisting conditions is: nan


NameError: name 'covid_exercise' is not defined

In [None]:
# # DROPPING INCOMPLETE SURVEYS WHICH ARE ONES THAT DO NOT HAVE TIMESTAMPS FOR EACH SURVEY MILESTONE.

# # Replace "incomplete" or a score of "0" for timestamps with blanks to drop null values.
# stanford_df["introduction_timestamp"].replace("[not completed]", np.nan, inplace=True)
# # stanford_df.replace("[not completed]", np.nan, inplace=True)
# stanford_df["social_interactions_timestamp"].replace("0", np.nan, inplace=True)
# stanford_df["hobbies_and_health_timestamp"].replace("0", np.nan, inplace=True)
# stanford_df["introduction_timestamp"].replace("0", np.nan, inplace=True)
# stanford_df["professional_life_timestamp"].replace("0", np.nan, inplace=True)
# stanford_df["social_contributions_timestamp"].replace("0", np.nan, inplace=True)

In [None]:
# # Drop Null timestamps to retain completed surveys.
# completed_surveys = stanford_df.dropna(subset=["introduction_timestamp", "social_interactions_timestamp", 
#                                                "hobbies_and_health_timestamp", "professional_life_timestamp", 
#                                                "social_contributions_timestamp"])
# print(len(completed_surveys))

In [None]:
# # DROPPING UNNECESSARY QUESTIONS

# clean_df = completed_surveys.drop(columns=["introduction_timestamp", "asian_category", "state_in_india", "province_in_china", 
#                           "state_other_countries", "introduction_complete", "social_interactions_timestamp", 
#                           "social_interactions_complete", "hobbies_and_health_timestamp", 
#                           "hobbies_and_health_complete", "professional_life_timestamp", 
#                           "professional_life_complete", "social_contributions_timestamp", 
#                           "social_contributions_complete"])
# print(len(clean_df))

In [None]:
# # Narrowing Down to the US responses.
# us_responses = clean_df[clean_df["country"] == 1]
# print(len(us_responses))

In [None]:
# # In case there are any empty values, drop surveys that didn't respond to post and pre-covid exercise Q&A.
# #  Should not be an issue because only completed timestamps were retained
# us_responses["covid_exercise"].replace("", np.nan, inplace=True)
# us_responses["physical_activities"].replace("", np.nan, inplace=True)
# print(len(us_responses))

In [None]:
# # In case there are any empty values, drop surveys that didn't respond to post and pre-covid exercise Q&A.
# #  Should not be an issue because only completed timestamps were retained
# us_responses["covid_exercise"].dropna(inplace = True)
# us_responses["physical_activities"].dropna(inplace = True)
# print(len(us_responses))

In [None]:
# # Creating Copy to get pre-existing dataframe
# preexisting_us = us_responses.copy()
# print(len(preexisting_us))

In [None]:
# nonexisting_us = us_responses.copy()
# print(len(nonexisting_us))

In [None]:
# preexisting_us = preexisting_us[preexisting_us["diabetes"] == 1 | 
#                                 (preexisting_us["cardiovascular_disorders"] == 1) | 
#                                 (preexisting_us["obesity"] == 1) | 
#                                 (preexisting_us["respiratory_infections"] == 1) | 
#                                 (preexisting_us["respiratory_disorders_exam"] == 1) | 
#                                 (preexisting_us["gastrointestinal_disorders"] == 1) | 
#                                 (preexisting_us["chronic_kidney_disease"] == 1) | 
#                                 (preexisting_us["autoimmune_disease"] == 1) | 
#                                 (preexisting_us["chronic_fatigue_syndrome_a"] == 1)]

In [None]:
# print(len(preexisting_us))
# preexisting_us.to_csv("preexisting_us.csv", encoding='utf-8', index=False)

In [None]:
# print(len(preexisting_us))

In [None]:
# print(len(nonexisting_us))

In [None]:
# nonexisting_us = nonexisting_us[(nonexisting_us["diabetes"] == 2) & 
#                                 (nonexisting_us["cardiovascular_disorders"] == 2) & 
#                                 (nonexisting_us["obesity"] == 2) & 
#                                 (nonexisting_us["respiratory_infections"] == 2) & 
#                                 (nonexisting_us["respiratory_disorders_exam"] == 2) & 
#                                 (nonexisting_us["gastrointestinal_disorders"] == 2) & 
#                                 (nonexisting_us["chronic_kidney_disease"] == 2) & 
#                                 (nonexisting_us["autoimmune_disease"] == 2) & 
#                                 (nonexisting_us["chronic_fatigue_syndrome_a"] == 2)]

In [None]:
# print(len(nonexisting_us))
# nonexisting_us.to_csv("nonexisting_us.csv", encoding='utf-8', index=False)

In [None]:
# # Computation of data for those that identified as having a pre-existing condition.
# p_physical_mean = preexisting_us.groupby(["country"],as_index=True).physical_activities.mean()
# p_physical_median = preexisting_us.groupby(["country"],as_index=True).physical_activities.median()
# # p_physical_mode = preexisting_us.groupby(["country"],as_index=True).physical_activities.mode()
# p_physical_sem = preexisting_us.groupby(["country"],as_index=True).physical_activities.sem()
# p_physical_std = preexisting_us.groupby(["country"],as_index=True).physical_activities.std()
# p_covid_mean = preexisting_us.groupby(["country"],as_index=True).covid_exercise.mean()
# p_covid_median = preexisting_us.groupby(["country"],as_index=True).covid_exercise.median()
# # p_meancovid_mode = preexisting_us.groupby(["country"],as_index=True).covid_exercise.mode()
# p_covid_sem = preexisting_us.groupby(["country"],as_index=True).covid_exercise.sem()
# p_covid_std = preexisting_us.groupby(["country"],as_index=True).covid_exercise.std()
# print(p_physical_mean)

In [None]:
# # Computation of data for those that identified as NOT having a pre-existing condition.
# n_physical_mean = nonexisting_us.groupby(["country"],as_index=True).physical_activities.mean()
# n_physical_median = nonexisting_us.groupby(["country"],as_index=True).physical_activities.median()
# # physical_mode = nonexisting_us.groupby(["country"],as_index=True).physical_activities.mode()
# n_physical_sem = nonexisting_us.groupby(["country"],as_index=True).physical_activities.sem()
# n_physical_std = nonexisting_us.groupby(["country"],as_index=True).physical_activities.std()
# n_covid_mean = nonexisting_us.groupby(["country"],as_index=True).covid_exercise.mean()
# n_covid_median = nonexisting_us.groupby(["country"],as_index=True).covid_exercise.median()
# # covid_mode = nonexisting_us.groupby(["country"],as_index=True).covid_exercise.mode()
# n_covid_sem = nonexisting_us.groupby(["country"],as_index=True).covid_exercise.sem()
# n_covid_std = nonexisting_us.groupby(["country"],as_index=True).covid_exercise.std()
# print(n_physical_mean)

In [None]:
# # Dependent Values for Scatter Plot
# plt.figure(figsize=(10,5))
# plt.bar(p_physical_mean, p_covid_mean)

# # Size, Labels, and Grid
# plt.title("Activities of Individuals w/ Pre-existing Conditions")
# plt.ylabel("Average")
# plt.xlabel("Before and After COVID Activity")

# # # Save the Plot
# # plt.savefig("Script Outputs/Lat_v_Temp.png")

# # Show plot
# plt.show()