In [3]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import plotly.express as px
#from decouple import config
import os
from IPython.display import IFrame

# ---------------- Pandas settings --------------- #
# Removes rows and columns truncation of '...'
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)


# ----------------- Set Mapbox Token --------------- #
MAPBOX_ACCESS_TOKEN=config("MAPBOX_ACCESS_TOKEN")
px.set_mapbox_access_token(MAPBOX_ACCESS_TOKEN)

# Set Data Dir
DATA_DIR="../drive_thru_testing_locations"

**Source: https://covid19.sokat.ai/**

In [4]:
IFrame("https://covid19.sokat.ai/", width="100%", height=400)

In [36]:
def markers_to_dataframe(url):
    """This function takes the https://covid19.sokat.ai/ web map as input to generate a CSV file of drive-thru testing locations. 
    """
    # Download webpage data with requests
    page = requests.get(url)
    # Rarse the webpage data with BeautifulSoup
    soup = BeautifulSoup(page.content, 'html.parser')
    
    # Select scripts in HTML file
    scripts = list(soup.find_all('script'))
    
    # Subset for the script with marker data and begin string splitting
    html = str(list(scripts[7])).split('var marker')
    
    # Use for-loops to generate the names, coordinates, and URLs of drive-thru testing locations
    list_of_names = []
    list_of_coords = []
    list_of_urls = []
    
    for i in range(2,len(html)-1):
        list_of_names.append(html[i].split('target="_blank">')[1].split('</a></div>')[0])
        list_of_coords.append(html[i].split('target="_blank">')[0].split("L.marker(\\n                [")[1].split("]")[0])
        list_of_urls.append(html[i].split('target="_blank">')[0].split('><a href="')[1].split('"')[0])

    # Concatenate lists into a pandas DataFrame
    dataframe = pd.DataFrame([list_of_names, list_of_coords, list_of_urls]).T
    
    # Rename the columns with appropriate titles
    dataframe.columns = ["Name","Coordinate", "URL"]
    
    # Generate Latitude and Longitude Series with string splitting
    dataframe["Latitude"] = dataframe["Coordinate"].str.split(expand=True)[0].str.replace(",","").astype(float)
    dataframe["Longitude"] = dataframe["Coordinate"].str.split(expand=True)[1].astype(float)
    
    # Return results
    return dataframe

In [32]:
df = markers_to_dataframe("https://covid19.sokat.ai/")
df = df[["Name", "URL", "Latitude", "Longitude"]] # added to parse coordinates column out to save space
df.info() # check for data type

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 332 entries, 0 to 331
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Name       332 non-null    object 
 1   URL        332 non-null    object 
 2   Latitude   332 non-null    float64
 3   Longitude  332 non-null    float64
dtypes: float64(2), object(2)
memory usage: 10.5+ KB


In [33]:
df.isnull().sum() # Check for missing values

Name         0
URL          0
Latitude     0
Longitude    0
dtype: int64

### Display data

In [37]:
df

Unnamed: 0,Name,URL,Latitude,Longitude
0,Collaborative Effort of Health Care Providers,https://www.adn.com/alaska-news/anchorage/2020...,61.182870,-149.837269
1,Alaska Healthcare,https://www.tomsguide.com/news/drive-through-c...,61.182620,-149.838060
2,Fairbanks\' Foundation Health Partners,http://www.newsminer.com/alerts/fairbanks-laun...,64.839980,-147.714320
3,Fairbanks Memorial Hospital,https://www.adn.com/alaska-news/2020/03/25/som...,64.832649,-147.741562
4,Church of the Highlands- Grant\'s Mill,https://www.google.com/amp/s/www.wvtm13.com/am...,33.516999,-86.655847
...,...,...,...,...
327,LVHN COVID-19 Assess and Test–Macungie,https://www.lvhn.org/testing-services/coronavi...,40.520270,-75.566380
328,LVHN COVID-19 Assess and Test–Muhlenberg,https://www.lvhn.org/testing-services/coronavi...,40.647550,-75.408170
329,LVHN COVID-19 Assess and Test–Nazareth,https://www.lvhn.org/testing-services/coronavi...,40.731020,-75.313660
330,LVHN COVID-19 Assess and Test\xad\xad–Richlan...,https://www.lvhn.org/testing-services/coronavi...,40.458400,-75.368450


### Save dataframe to `csv`

In [38]:
df.to_csv(os.path.join(DATA_DIR, 'us-drive-thru-testing-locations.csv'))

### Plotting Drive-Thru centers via Plotly Express

In [11]:
fig = px.scatter_mapbox(df, lat="Latitude", lon="Longitude", hover_name="Name", zoom=3)
fig.update_layout(mapbox_style="dark")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()