In [1]:
import numpy as np 
import pandas as pd 
from datetime import datetime, date, time
from datetime import timedelta
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")
%matplotlib inline
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px

# set float display default
pd.set_option('display.float_format', lambda x: '%.2f' % x)

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

## Read the new data

In [2]:
# use the data with factors
df = pd.read_csv("covid_with_factors.csv")

In [3]:
column_values = df.apply(lambda x: len(x.unique()))
column_values[column_values >10]

ethnicity                                                                                                                                                                  234
religion                                                                                                                                                                    52
batch_date                                                                                                                                                                 126
observation_lat                                                                                                                                                          74643
observation_lon                                                                                                                                                         125137
HASC_code                                                                                                                    

In [4]:
mult_responses = column_values[column_values >10].loc["what_kind_of_locations_or_facilities_are_offering_testing_in_your_city":"which_media_outlets_do_you_think_only_report_on_the_potential_disadvantages_of_a_covid_19_vaccine"]

In [5]:
mult_responses

what_kind_of_locations_or_facilities_are_offering_testing_in_your_city                                                                                                    256
which_of_the_following_activities_will_you_feel_comfortable_with_now_or_immediately_after_non_essential_businesses_re_open                                                129
which_of_the_following_categories_of_items_were_difficult_or_impossible_to_find                                                                                           512
from_which_of_the_following_places_have_you_purchased_or_looked_for_food_and_other_supplies                                                                                64
which_of_the_following_factors_have_inclined_you_to_not_closely_follow_the_guidance_of_social_distancing                                                                   64
which_of_the_following_behaviors_are_you_or_do_you_expect_to_demonstrate_in_public_once_non_essential_businesses_re_open          

In [7]:
df['what_kind_of_locations_or_facilities_are_offering_testing_in_your_city'].value_counts()

My primary care physician’s office                                                     267368
Hospital                                                                               237893
Local health department                                                                159236
Drive-up testing facility                                                               96574
Drive-up testing facility^My primary care physician’s office                            39372
                                                                                        ...  
Emergency room^My primary care physician’s office^Other^Pharmacy^Urgent care clinic         3
Emergency room^Other^Pharmacy^Urgent care clinic                                            3
Drive-up testing facility^Emergency room^Other^Pharmacy                                     2
Emergency room^Other^Pharmacy                                                               2
Drive-up testing facility^Emergency room^Local health depart

#### what_kind_of_locations_or_facilities_are_offering_testing_in_your_city
+ My primary care physician's office 
+ Drive-up testing facility
+ Local health department
+ Urgent care clinic
+ Pharmacy
+ Emergency room
+ Hospital
+ Other


In [15]:
df['locations_offering_testing_in_your_city_My_primary_care_physicians_office'] = df["what_kind_of_locations_or_facilities_are_offering_testing_in_your_city"].str.contains("My primary care physician", regex=False, na=False).astype(int) #only use the first half to search. When I add 's office, it cannot recongnize because of the 's
df['locations_offering_testing_in_your_city_Drive_up_testing_facility'] = df["what_kind_of_locations_or_facilities_are_offering_testing_in_your_city"].str.contains("Drive-up testing facility", regex=False, na=False).astype(int)
df['locations_offering_testing_in_your_city_Local_health_department'] = df["what_kind_of_locations_or_facilities_are_offering_testing_in_your_city"].str.contains("Local health department", regex=False, na=False).astype(int)
df['locations_offering_testing_in_your_city_Urgent_care_clinic'] = df["what_kind_of_locations_or_facilities_are_offering_testing_in_your_city"].str.contains("Urgent care clinic", regex=False, na=False).astype(int)
df['locations_offering_testing_in_your_city_Pharmacy'] = df["what_kind_of_locations_or_facilities_are_offering_testing_in_your_city"].str.contains("Pharmacy", regex=False, na=False).astype(int)
df['locations_offering_testing_in_your_city_Emergency_room'] = df["what_kind_of_locations_or_facilities_are_offering_testing_in_your_city"].str.contains("Emergency room", regex=False, na=False).astype(int)
df['locations_offering_testing_in_your_city_Hospital'] = df["what_kind_of_locations_or_facilities_are_offering_testing_in_your_city"].str.contains("Hospital", regex=False, na=False).astype(int)
df['locations_offering_testing_in_your_city_Other'] = df["what_kind_of_locations_or_facilities_are_offering_testing_in_your_city"].str.contains("Other", regex=False, na=False).astype(int)


In [16]:
check_cols = [col for col in df.columns if 'offering_testing_in_your_city' in col]
print(check_cols)

['what_kind_of_locations_or_facilities_are_offering_testing_in_your_city', 'locations_offering_testing_in_your_city_My_primary_care_physicians_office', 'locations_offering_testing_in_your_city_Drive_up_testing_facility', 'locations_offering_testing_in_your_city_Local_health_department', 'locations_offering_testing_in_your_city_Urgent_care_clinic', 'locations_offering_testing_in_your_city_Pharmacy', 'locations_offering_testing_in_your_city_Emergency_room', 'locations_offering_testing_in_your_city_Hospital', 'locations_offering_testing_in_your_city_Other']


In [17]:
df[check_cols][~df["what_kind_of_locations_or_facilities_are_offering_testing_in_your_city"].isnull()]

Unnamed: 0,what_kind_of_locations_or_facilities_are_offering_testing_in_your_city,locations_offering_testing_in_your_city_My_primary_care_physicians_office,locations_offering_testing_in_your_city_Drive_up_testing_facility,locations_offering_testing_in_your_city_Local_health_department,locations_offering_testing_in_your_city_Urgent_care_clinic,locations_offering_testing_in_your_city_Pharmacy,locations_offering_testing_in_your_city_Emergency_room,locations_offering_testing_in_your_city_Hospital,locations_offering_testing_in_your_city_Other
0,Hospital,0,0,0,0,0,0,1,0
3,Drive-up testing facility,0,1,0,0,0,0,0,0
4,Local health department^My primary care physic...,1,0,1,0,0,0,0,0
7,Drive-up testing facility^Local health departm...,1,1,1,1,0,0,0,0
9,Hospital,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...
2866976,Local health department,0,0,1,0,0,0,0,0
2866977,My primary care physician’s office^Urgent care...,1,0,0,1,0,0,0,0
2866978,Drive-up testing facility,0,1,0,0,0,0,0,0
2866979,Hospital^Urgent care clinic,0,0,0,1,0,0,1,0
