# BC COVID-19 Data by Health Authority

1. [Feature Server JSON](https://services1.arcgis.com/xeMpV7tU1t4KD3Ei/arcgis/rest/services/COVID19_Cases_by_BC_Health_Authority/FeatureServer/0?f=pjson) `id 1,2,3,4,5`

2. [Case Details](http://www.bccdc.ca/health-info/diseases-conditions/covid-19/data)

---

In [1010]:
import numpy as np
import pandas as pd
import geopandas as gpd
import requests
from shapely.geometry import shape

In [3]:
r = requests.get("https://services1.arcgis.com/xeMpV7tU1t4KD3Ei/arcgis/rest/services/COVID19_Cases_by_BC_Health_Authority/FeatureServer/0/query?where=&objectIds=1%2C2%2C3%2C4%2C5&time=&geometry=&geometryType=esriGeometryPolygon&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=&returnGeometry=true&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=true&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pgeojson&token=")
r.raise_for_status()

data = r.json()

In [4]:
gdf = gpd.GeoDataFrame.from_features(data["features"])
print(gdf.head())

                                            geometry            HA_Name
0  POLYGON ((-120.45314 52.99326, -120.42102 52.9...           Interior
1  MULTIPOLYGON (((-122.93676 49.31128, -122.9365...             Fraser
2  MULTIPOLYGON (((-123.10903 49.28623, -123.0231...  Vancouver Coastal
3  MULTIPOLYGON (((-126.66725 51.19230, -126.6705...   Vancouver Island
4  MULTIPOLYGON (((-129.76908 55.45115, -129.7657...           Northern


In [11]:
with open('../data/bc-health-authorities.geojson', 'w') as f:
    f.write(gdf.to_json())

---

2. **Case Details**

In [9]:
pd.read_csv('../data/BCCDC_COVID19_Dashboard_Case_Details.csv')

Unnamed: 0,Reported_Date,HA,Sex,Age_Group,Classification_Reported
0,2020-01-26,Out of Canada,M,40-49,Lab-diagnosed
1,2020-02-02,Vancouver Coastal,F,50-59,Lab-diagnosed
2,2020-02-05,Out of Canada,F,20-29,Lab-diagnosed
3,2020-02-05,Out of Canada,M,30-39,Lab-diagnosed
4,2020-02-11,Interior,F,30-39,Lab-diagnosed
...,...,...,...,...,...
3295,2020-07-19,Vancouver Island,M,80-89,Lab-diagnosed
3296,2020-07-19,Vancouver Island,F,60-69,Lab-diagnosed
3297,2020-07-19,Vancouver Island,F,50-59,Lab-diagnosed
3298,2020-07-19,Out of Canada,F,30-39,Lab-diagnosed


---
# BC COVID Data for LTCs

1. [long term facilities](https://www.seniorsadvocatebc.ca/quickfacts/search/%20)
2. [outbreaks](http://www.phsa.ca/current-outbreaks)

---

1. **Long term facilities**

In [4]:
from bs4 import BeautifulSoup

In [18]:
# user defined exception errors
class Error(Exception):
    """Base class for other exceptions"""
    pass

class InvalidHome(Error):
    """Raised when return is not a home name or there is a duplicate"""
    pass

class InvalidAddress(Error):
    """Raised when return is not a long-term care home address"""
    pass

In [253]:
address = []
home = []
links = []
    
for i in range(1,31):
    re = requests.get('https://www.seniorsadvocatebc.ca/quickfacts/search/%20/'+ str(i))
    s = BeautifulSoup(re.text,'html5lib')

    containers = s.find_all("div")

    for each in containers:
        try:
             # home address
            if 'Address:' not in each.find('p').getText():
                raise InvalidAddress
            else:
                address.append(each.find('p').getText())

            # home name
            if each.find('a').getText() in ['Home', '\n\t\t\t\t\t\t\t«\n\t\t\t\t\t\t', 
                                            'Data Sources', 'Visit us on Facebook', 
                                            'Email the OSA', '']:
                raise InvalidHome
            elif each.find('a').getText() in home:
                # remove duplicates (if home name already exists)
                raise InvalidHome
            else:
                home.append(each.find('a').getText())

            # home url
            if each.find('a').get('href') in links:
                # remove duplicates (if url already exists)
                raise InvalidHome
            else:
                links.append(each.find('a').get('href'))

        except AttributeError:
            pass
        except InvalidHome:
            pass
        except InvalidAddress:
            pass

In [1096]:
ltc = pd.DataFrame({'Facility':home, 'Link':links, 'Street address':address})

In [1097]:
# clean and separate address info
ltc['Street address'] = ltc['Street address'].str.replace('\n\t\t\t\t',' ')
ltc['city/postal'] = ltc['Street address'].str.extract(r'City/postal:(.*) Phone:')
ltc['phone'] = ltc['Street address'].str.extract(r'Phone:(.*)\t\t\t')
ltc['Street address'] = ltc['Street address'].str.extract(r'Address:(.*) City/postal:')

In [1098]:
ltc

Unnamed: 0,Facility,Link,Street address,city/postal,phone
0,Acropolis Manor,https://www.seniorsadvocatebc.ca/quickfacts/lo...,1325 Summit Avenue,Prince Rupert V8J4C1,(250) 622-6400
1,Aberdeen Hospital,https://www.seniorsadvocatebc.ca/quickfacts/lo...,1450 Hillside Ave.,Victoria V8T2B7,(250) 370-5648
2,Adanac Park Lodge,https://www.seniorsadvocatebc.ca/quickfacts/lo...,851 Boundary Road,Vancouver V5K4T2,(604) 299-7567
3,Acacia Ty Mawr,https://www.seniorsadvocatebc.ca/quickfacts/lo...,2655 E Shawnigan Lake Rd,Shawnigan Lake V0R2W0,(250) 743-2124
4,Ayre Manor,https://www.seniorsadvocatebc.ca/quickfacts/lo...,6764 Ayre Rd,Sooke V9Z1K1,(250) 642-1750
...,...,...,...,...,...
292,Yucalta Lodge,https://www.seniorsadvocatebc.ca/quickfacts/lo...,555 - 2nd Avenue,Campbell River V9W3V1,(250) 850-2900
293,Youville Residence,https://www.seniorsadvocatebc.ca/quickfacts/lo...,4950 Heather Street,Vancouver V5Z3L9,(604) 261-9371
294,Yaletown House,https://www.seniorsadvocatebc.ca/quickfacts/lo...,1099 Cambie Street,Vancouver V6B5A8,(604) 689-0022
295,Wrinch Memorial Hospital,https://www.seniorsadvocatebc.ca/quickfacts/lo...,2510 West Hwy 62,Hazelton V0J1Y0,(250) 842-5211


### **Web Crawler for Home Details**

Note: did not include  'Disease outbreak or occurrence' since 21 are missing. would need a try except statement.

In [1093]:
reques = requests.get(links[0])
soup = BeautifulSoup(reques.text, 'html5lib')

alltext = []
for t in soup.find_all('td'):
    alltext.append(t.getText())
    
    
keep = ['\n\t\t\t\t\t\t\n\t\t\t\t\t', 'Health authority', 'Accreditation status', 'Councils', 'Private beds (not publicly funded)', 'Publicly funded beds (short- and long-term)',
       'Total beds', 'Private rooms', 'Semi-private rooms', 'Multi-person rooms', 'Total direct care hours (hours per resident per day)',
       'Number of licensing complaints', 'Number substantiated licensing complaints',
       'Number of Inspections', 'Number of licensing infractions found', 'Hygiene & communicable disease control', 'Average age of population']


# initialize empty dict
details = {}

def curate(x, keep):
    '''param x: is a list of text scraped from LTC tables
        param keep: is a list of key words to wait for before selecting text'''
    index = -1

    if not bool(details):
        # if dictionary is empty, create the keys and values
        for i in x:
            index += 1
            if i in keep:
                details[i] = [x[index+1]]
        
    else:
        # if the dictionary already has entries, append new values
        for i in x:
            index += 1
            if i in keep:
                details[i].append(x[index+1])
            
    return details

***Warning:*** slow script below

In [1094]:
for l, i in zip(links, range(1, len(links)+1)):
    print('making beautiful soup - {} of {}'.format(i, len(links)))
    re = requests.get(l)
    s = BeautifulSoup(re.text,'html5lib')
    complete = []

    for t in s.find_all('td'):
        # SLOWEST STEP
        complete.append(t.getText())

    curate(complete, keep)

making beautiful soup - 1 of 297
making beautiful soup - 2 of 297
making beautiful soup - 3 of 297
making beautiful soup - 4 of 297
making beautiful soup - 5 of 297
making beautiful soup - 6 of 297
making beautiful soup - 7 of 297
making beautiful soup - 8 of 297
making beautiful soup - 9 of 297
making beautiful soup - 10 of 297
making beautiful soup - 11 of 297
making beautiful soup - 12 of 297
making beautiful soup - 13 of 297
making beautiful soup - 14 of 297
making beautiful soup - 15 of 297
making beautiful soup - 16 of 297
making beautiful soup - 17 of 297
making beautiful soup - 18 of 297
making beautiful soup - 19 of 297
making beautiful soup - 20 of 297
making beautiful soup - 21 of 297
making beautiful soup - 22 of 297
making beautiful soup - 23 of 297
making beautiful soup - 24 of 297
making beautiful soup - 25 of 297
making beautiful soup - 26 of 297
making beautiful soup - 27 of 297
making beautiful soup - 28 of 297
making beautiful soup - 29 of 297
making beautiful soup -

making beautiful soup - 239 of 297
making beautiful soup - 240 of 297
making beautiful soup - 241 of 297
making beautiful soup - 242 of 297
making beautiful soup - 243 of 297
making beautiful soup - 244 of 297
making beautiful soup - 245 of 297
making beautiful soup - 246 of 297
making beautiful soup - 247 of 297
making beautiful soup - 248 of 297
making beautiful soup - 249 of 297
making beautiful soup - 250 of 297
making beautiful soup - 251 of 297
making beautiful soup - 252 of 297
making beautiful soup - 253 of 297
making beautiful soup - 254 of 297
making beautiful soup - 255 of 297
making beautiful soup - 256 of 297
making beautiful soup - 257 of 297
making beautiful soup - 258 of 297
making beautiful soup - 259 of 297
making beautiful soup - 260 of 297
making beautiful soup - 261 of 297
making beautiful soup - 262 of 297
making beautiful soup - 263 of 297
making beautiful soup - 264 of 297
making beautiful soup - 265 of 297
making beautiful soup - 266 of 297
making beautiful sou

In [1095]:
# summary of contents
for k in details.keys():
    print(k, len(details[k]))

df = pd.DataFrame(details)

# clean facility name scrape
df.rename(columns = {'\n\t\t\t\t\t\t\n\t\t\t\t\t' : 'Facility'}, inplace = True)
df.Facility = df.Facility.str.replace('\n\t\t\t\t\t\t', '')
df.Facility = df.Facility.str.replace(' Quick Facts(.*)\n\t\t\t\t\t', '')
df


						
					 297
Health authority 297
Accreditation status 297
Councils 297
Private beds (not publicly funded) 297
Publicly funded beds (short- and long-term) 297
Total beds 297
Private rooms 297
Semi-private rooms 297
Multi-person rooms 297
Total direct care hours (hours per resident per day) 297
Number of licensing complaints 297
Number substantiated licensing complaints 297
Number of Inspections 297
Number of licensing infractions found 297
Hygiene & communicable disease control 297
Average age of population 297


Unnamed: 0,Facility,Health authority,Accreditation status,Councils,Private beds (not publicly funded),Publicly funded beds (short- and long-term),Total beds,Private rooms,Semi-private rooms,Multi-person rooms,Total direct care hours (hours per resident per day),Number of licensing complaints,Number substantiated licensing complaints,Number of Inspections,Number of licensing infractions found,Hygiene & communicable disease control,Average age of population
0,Acropolis Manor,Northern Health,Accredited,"Resident Council, Family Council",0,61,61,49,6,0,3.47,1,1,4,8,0,83
1,Aberdeen Hospital,Vancouver Island Health,Accredited,Resident & Family Council (combined),0,100,100,19,4,25,4.74,7,4,1,2,0,69
2,Adanac Park Lodge,Vancouver Coastal Health,Not Accredited,Resident Council,0,73,73,67,3,0,3.43,0,0,2,2,0,63
3,Acacia Ty Mawr,Vancouver Island Health,Accredited,Family Council,0,35,35,31,2,0,2.80,1,0,2,1,0,84
4,Ayre Manor,Vancouver Island Health,Accredited,Resident & Family Council (combined),1,32,33,32,0,0,3.13,0,0,3,3,0,82
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,Yucalta Lodge,Vancouver Island Health,Accredited,Resident Council,0,99,99,100,0,0,3.22,12,5,4,2,0,82
293,Youville Residence,Vancouver Coastal Health,Accredited,Resident & Family Council (combined),0,42,42,42,0,0,3.57,0,0,5,5,0,79
294,Yaletown House,Vancouver Coastal Health,Not Accredited,Resident & Family Council (combined),0,127,127,127,0,0,2.77,0,0,4,2,0,82
295,Wrinch Memorial Hospital,Northern Health,Accredited,,0,10,10,1,5,0,3.24,,,1,8,0,88


In [1099]:
ltc = pd.merge(ltc, df, how = 'outer', on = 'Facility')
ltc.tail()

Unnamed: 0,Facility,Link,Street address,city/postal,phone,Health authority,Accreditation status,Councils,Private beds (not publicly funded),Publicly funded beds (short- and long-term),...,Private rooms,Semi-private rooms,Multi-person rooms,Total direct care hours (hours per resident per day),Number of licensing complaints,Number substantiated licensing complaints,Number of Inspections,Number of licensing infractions found,Hygiene & communicable disease control,Average age of population
292,Yucalta Lodge,https://www.seniorsadvocatebc.ca/quickfacts/lo...,555 - 2nd Avenue,Campbell River V9W3V1,(250) 850-2900,Vancouver Island Health,Accredited,Resident Council,0,99,...,100,0,0,3.22,12.0,5.0,4,2,0,82
293,Youville Residence,https://www.seniorsadvocatebc.ca/quickfacts/lo...,4950 Heather Street,Vancouver V5Z3L9,(604) 261-9371,Vancouver Coastal Health,Accredited,Resident & Family Council (combined),0,42,...,42,0,0,3.57,0.0,0.0,5,5,0,79
294,Yaletown House,https://www.seniorsadvocatebc.ca/quickfacts/lo...,1099 Cambie Street,Vancouver V6B5A8,(604) 689-0022,Vancouver Coastal Health,Not Accredited,Resident & Family Council (combined),0,127,...,127,0,0,2.77,0.0,0.0,4,2,0,82
295,Wrinch Memorial Hospital,https://www.seniorsadvocatebc.ca/quickfacts/lo...,2510 West Hwy 62,Hazelton V0J1Y0,(250) 842-5211,Northern Health,Accredited,,0,10,...,1,5,0,3.24,,,1,8,0,88
296,Zion Park Manor,https://www.seniorsadvocatebc.ca/quickfacts/lo...,5939-180th St,Surrey V3S4L2,(604) 576-2891,Fraser Health,Not Accredited,Resident & Family Council (combined),29,70,...,99,0,0,3.08,0.0,0.0,1,2,0,85


2. **Outbreaks**

From: [Nora Loreto](https://docs.google.com/spreadsheets/d/1M_RzojK0vwF9nAozI7aoyLpPU8EA1JEqO6rq0g1iebU/edit#gid=0)

*Up to date as of 23h July 21*

---

In [1071]:
cases = pd.read_csv('../data/Deaths in Residential Care in Canada by facility - Institutions.csv',  skiprows=[0,1])

In [1072]:
# BC facilities
BC_cases = cases.loc[cases['Prov.'] == 'BC']
BC_cases.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 22 entries, 82 to 630
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Facility               22 non-null     object 
 1   Location               22 non-null     object 
 2   Prov.                  22 non-null     object 
 3   Number of dead         22 non-null     float64
 4   Number of beds (appx)  3 non-null      float64
 5   %                      17 non-null     object 
 6   Public or private      14 non-null     object 
 7   Owners                 3 non-null      object 
 8   Union                  0 non-null      object 
 9   Unnamed: 9             1 non-null      object 
 10  Unnamed: 10            1 non-null      object 
dtypes: float64(2), object(9)
memory usage: 2.1+ KB


In [1073]:
# drop unneccessary columns
dr = ['Owners', 'Union', 'Unnamed: 9', 'Unnamed: 10']

BC_cases.drop(columns = dr, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy [frame.py:3997]


### Clean and Merge LTC Details with COVID-19 Outbreak Information

---

In [1100]:
merge = pd.merge(BC_cases, ltc, on = 'Facility', how = 'inner')

In [1101]:
list(set(merge.Facility.unique()) - set(BC_cases.Facility.unique()))

[]

In [1102]:
list(set(BC_cases.Facility.unique()) - set(merge.Facility.unique()))

['Amica Edgemont Village',
 'Shaughnessy Care Centre',
 'Swedish Assisted Living',
 'South Granville Park Lodge',
 'Mission Institution*',
 'Four LTC homes in Vancouver - July 6',
 'Chartwell Willow Long-term Care ']

**Unaccounted for homes in initial merge:**

Name in BC Cases | In LTC list
-----------------|-------------
Holy Family Hospital long term care unit | Holy Family Hospital
Haro Park | Haro Park Centre
Lions Gate Hospital (Unit 6 East) | [Evergreen House - Lions Gate Hospital](https://www.google.com/search?client=safari&rls=en&q=lions+gate+hospital+unit+6+evergreen+house&ie=UTF-8&oe=UTF-8)
Four LTC homes in Vancouver - July 6 | `unknown`
Amica Edgemont Village | `missing - senior living`
Ridge Meadows Hospital (Rehab and Unit 3W) | [Baillie House](https://www.fraserhealth.ca/Service-Directory/Service-At-Location/5/7/emergency---ridge-meadows-hospital#.Xx8Rpy0ZNQI)
Swedish Assisted Living | `missing - assisted living`
Worthington Longterm Care Facility | [Cottage and Worthington Pavilions - MSA Hospital](https://www.fraserhealth.ca/Service-Directory/Locations/Abbotsford/cottage-worthington-pavilion#.Xx8PRy0ZNQI)
Residence at Clayton Heights | The Residence at Clayton Heights
South Granville Park Lodge | `missing`
Shaughnessy Care Centre | `missing`
Valhaven Home | Valhaven Rest Home
Mission Institution* | `correctional facility`

***Correct nomenclature to match those in LTC scrape:***

In [1082]:
BC_cases.Facility.replace({'Holy Family Hospital long term care unit' : 'Holy Family Hospital',
                           'Haro Park' : 'Haro Park Centre',
                           'Lions Gate Hospital (Unit 6 East)' : 'Evergreen House - Lions Gate Hospital',
                           'Worthington Longterm Care Facility' : 'Cottage and Worthington Pavilions - MSA Hospital',
                           'Residence at Clayton Heights' : 'The Residence at Clayton Heights',
                           'Ridge Meadows Hospital (Rehab and Unit 3W)' : 'Baillie House',
                           'Valhaven Home':'Valhaven Rest Home'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy [generic.py:6746]


In [1103]:
merge = pd.merge(BC_cases, ltc, on = 'Facility', how = 'right')
list(set(BC_cases.Facility.unique()) - set(merge.Facility.unique()))

['Amica Edgemont Village',
 'Shaughnessy Care Centre',
 'Swedish Assisted Living',
 'South Granville Park Lodge',
 'Mission Institution*',
 'Four LTC homes in Vancouver - July 6',
 'Chartwell Willow Long-term Care ']

#### As done in Ontario analysis, add binary columns for councils and accreditation

In [1112]:
def residents(x):
    if 'Resident' in x:
        return 'yes'
    else:
        return 'no'
    
    
def family(x):
    if 'Family' in x:
        return 'yes'
    else:
        return 'no'

def accred(x):
    if x == 'Not Accredited':
        return 'no'
    elif x == 'Accredited':
        return 'yes'
    else:
        return 'unknown'
    
merge['residents_council'] = merge.Councils.map(lambda x: residents(x))
merge['family_council'] = merge.Councils.map(lambda x: family(x))
merge['accreditation'] = merge['Accreditation status'].map(lambda x: accred(x))

#### Add outbreak status column

In [1104]:
def outbreak(x):
    try:
        if int(x) != np.nan:
            return "yes"
    except ValueError:
        return "no"
    
merge['outbreak_status'] = merge['Number of dead'].map(lambda x: outbreak(x))

Manually adjust outbreak status from webscrapes (supplementary portion at bottom of this .ipynb)

In [1514]:
# July 25th Scrape from Northern
merge.outbreak_status.loc[merge.Facility == 'Terraceview Lodge'] = 'yes'

# Aug 4th Scrape from Fraser
merge.outbreak_status.loc[merge.Facility == 'Dania Home'] = 'yes'

# Aug 10th Scrape from Fraser
merge.outbreak_status.loc[merge.Facility == 'Maple Ridge Seniors Village'] = 'yes'
# merge.outbreak_status.loc[merge.Facility == 'Derby Manor'] = 'yes' - NOT FOUND
merge.outbreak_status.loc[merge.Facility == 'George Derby Centre'] = 'yes'
# merge.outbreak_status.loc[merge.Facility == 'New Vista Care Society'] = 'yes' - NOT FOUND

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy [indexing.py:671]


In [1512]:
merge.query('Facility.str.contains("Derby Manor") or Facility.str.contains("New Vista Care Society")', engine = 'python')

Unnamed: 0,Facility,Location,Prov.,Number of dead,Number of beds (appx),%,Public or private,Link,Street address,city/postal,...,Number of licensing complaints,Number substantiated licensing complaints,Number of Inspections,Number of licensing infractions found,Hygiene & communicable disease control,Average age of population,outbreak_status,residents_council,family_council,accreditation


In [1105]:
merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 297 entries, 0 to 296
Data columns (total 28 columns):
 #   Column                                                Non-Null Count  Dtype  
---  ------                                                --------------  -----  
 0   Facility                                              297 non-null    object 
 1   Location                                              15 non-null     object 
 2   Prov.                                                 15 non-null     object 
 3   Number of dead                                        15 non-null     float64
 4   Number of beds (appx)                                 3 non-null      float64
 5   %                                                     11 non-null     object 
 6   Public or private                                     11 non-null     object 
 7   Link                                                  297 non-null    object 
 8   Street address                                        297 no

### Caveats:

- Nora Loreto's information may not be complete (e.g. Terraceview Lodge from VCH scrape)
- Nora Loreto reports public or private status of home, which is not found on BC government websites (as of yet) and does not reflect private or publicly funded beds

## Home Type Scrape 

From: [FOI Request (2018)](http://docs.openinfo.gov.bc.ca/Response_Package_HTH-2018-83925.pdf)

In [1161]:
from PyPDF2 import PdfFileReader
import io
import re

hometype = requests.get('http://docs.openinfo.gov.bc.ca/Response_Package_HTH-2018-83925.pdf')

with io.BytesIO(hometype.content) as f:
            pdf = PdfFileReader(f)
            information = pdf.getDocumentInfo()
            pages = pdf.getNumPages()
            print(pdf.getPageLayout())
            for i in range(2, 9):
                page = pdf.getPage(i)
                print(page)
                page_content = page.getContents()           
                print(page_content)

None
{'/Type': '/Page', '/MediaBox': [0, 0, 792, 612], '/Parent': IndirectObject(3, 0), '/Resources': {'/XObject': {'/Im3': IndirectObject(21, 0)}, '/ProcSet': ['/PDF', '/ImageI']}, '/Contents': IndirectObject(19, 0)}
{}
{'/Type': '/Page', '/MediaBox': [0, 0, 792, 612], '/Parent': IndirectObject(3, 0), '/Resources': {'/XObject': {'/Im4': IndirectObject(28, 0)}, '/ProcSet': ['/PDF', '/ImageI']}, '/Contents': IndirectObject(26, 0)}
{}
{'/Type': '/Page', '/MediaBox': [0, 0, 792, 612], '/Parent': IndirectObject(3, 0), '/Resources': {'/XObject': {'/Im5': IndirectObject(35, 0)}, '/ProcSet': ['/PDF', '/ImageI']}, '/Contents': IndirectObject(33, 0)}
{}
{'/Type': '/Page', '/MediaBox': [0, 0, 792, 612], '/Parent': IndirectObject(3, 0), '/Resources': {'/XObject': {'/Im6': IndirectObject(42, 0)}, '/ProcSet': ['/PDF', '/ImageI']}, '/Contents': IndirectObject(40, 0)}
{}
{'/Type': '/Page', '/MediaBox': [0, 0, 792, 612], '/Parent': IndirectObject(3, 0), '/Resources': {'/XObject': {'/Im7': IndirectObje

<mark> **Since PDF charts are images and can not be scraped, home type was recorded manually in reference to the released document.**</mark>


In [1172]:
types = pd.read_csv('/Users/kt/Documents/covid-research/data/bc_home_types.csv')

In [1173]:
types.head()

Unnamed: 0,Health authority,city/postal,Facility,owner_type,comments,home_type
0,Fraser Health,Abbotsford V2S0B3,Cottage and Worthington Pavilions - MSA Hospital,HA,,health_authority
1,Fraser Health,Abbotsford V2S1K1,Menno Hospital,PNP,,non-profit
2,Fraser Health,Abbotsford V2S1K8,The Mayfair,PNP,,non-profit
3,Fraser Health,Abbotsford V2S1N2,Menno Home,PNP,,non-profit
4,Fraser Health,Abbotsford V2S2Z8,Maplewood House,PNP,,non-profit


In [1515]:
merge.to_csv('../data/bc_ltc.csv')

---

---
---

# ***FIN***

---
---
---

# Outbreak Data Web Scrape

2. **Outbreaks**

---

***Northern***

In [1174]:
re = requests.get('https://www.northernhealth.ca/health-topics/current-outbreaks')
s = BeautifulSoup(re.text,'html5lib')
table =s.find_all('td')

In [1175]:
d = ['city', 'facility', 'outbreaktype', 'datedeclared']
outbreak = {}
for each,i in zip(table,d):
    outbreak[i] = each.text.strip()

In [1176]:
northern = pd.DataFrame(outbreak, index = [0])
northern['health_authority'] = "northern"

In [1177]:
northern.datedeclared = northern.datedeclared.str.replace('\n\n\t\t\t', '-')

In [1178]:
northern

Unnamed: 0,city,facility,outbreaktype,datedeclared,health_authority
0,None to date,,,,northern


***Interior Health***

In [1179]:
ih = requests.get('https://www.interiorhealth.ca/YourEnvironment/CommunicableDiseaseControl/Outbreaks/Pages/default.aspx')
ih = BeautifulSoup(ih.text,'html5lib')
outbreaks = ih.find_all('table')[7]

In [1180]:
outbreaks.find('td').text

'There are no items to show in this view of the "Outbreaks" list.'

***Vancouver Island***

In [1181]:
vi = requests.get('https://www.healthspace.ca/Clients/VIHA/VIHA_Website.nsf/Outbreak')
vi = BeautifulSoup(vi.text,'html5lib')
outbreaks = vi.find('td')

In [1182]:
outbreaks.getText()

' Currently, there are no reported outbreaks in VIHA hospitals or long term care facilities.'

***Vancouver Coastal Health***

In [1183]:
from PyPDF2 import PdfFileReader
import io
import re

In [1184]:
vch = requests.get('http://www.vch.ca/Documents/facility-outbreak-bulletin.pdf')

In [1185]:
with io.BytesIO(vch.content) as f:
            pdf = PdfFileReader(f)
            information = pdf.getDocumentInfo()
            pages = pdf.getNumPages()
            for i in range(0, pages):
                page = pdf.getPage(i)
                page_content = page.extractText()           
                print(page_content)

Facility Outbreak Bulletin
This bulletin lists ongoing and recently ended outbreaks in licensed long-term and acute care
facilities throughout Vancouver Coastal Health, as of 01:00 PM, 06-Aug-2020
DISEASE
LOCATION
RESTRICTIONS
IMPOSED
RESTRICTIONS
LIFTED
FACILITY
COVID-19
9020 Bridgeport Road, Richmond
05-Aug-20
Richmond Lions Manor - Bridgeport, 2nd Floor
COVID-19
803 West 12th Avenue, Vancouver
06-Aug-20
Joseph & Rosalie Segal Family Health Centre, 8th 
Floor
COVID-19
1081 Burrard Street, Vancouver
16-Jul-20
28-Jul-20
St. Paul's Hospital, NICU
COVID-19
7801 Argyle St, Vancouver
09-Jun-20
Holy Family Hospital, LTCF (Rehabilitation Unit 
declared over)
Page 1 of 1
Red text denotes updates from previously issued bulletin
Grey text indicates that restrictions have been lifted
Restriction Imposed: 
Restrictions Lifted: 
               Date which outbreak measures were introduced
            Date which outbreak measures were discontinued



----

In [1469]:
spl = page_content.split('\n')

In [1470]:
spldf = pd.Series(spl)

In [1471]:
spldf.iloc[10:29]

10                                             COVID-19
11                       9020 Bridgeport Road, Richmond
12                                            05-Aug-20
13         Richmond Lions Manor - Bridgeport, 2nd Floor
14                                             COVID-19
15                      803 West 12th Avenue, Vancouver
16                                            06-Aug-20
17    Joseph & Rosalie Segal Family Health Centre, 8th 
18                                                Floor
19                                             COVID-19
20                       1081 Burrard Street, Vancouver
21                                            16-Jul-20
22                                            28-Jul-20
23                            St. Paul's Hospital, NICU
24                                             COVID-19
25                            7801 Argyle St, Vancouver
26                                            09-Jun-20
27     Holy Family Hospital, LTCF (Rehabilitatio

In [1484]:
vch_outbreaks = pd.DataFrame({'info' : spldf.iloc[11:29]})

In [1483]:
vch_outbreaks.groupby(vch_outbreaks.index // 5).agg('\n'.join)['info'].str.split('\n',expand=True)

Unnamed: 0,0,1,2,3,4
2,"9020 Bridgeport Road, Richmond",05-Aug-20,"Richmond Lions Manor - Bridgeport, 2nd Floor",COVID-19,
3,"803 West 12th Avenue, Vancouver",06-Aug-20,"Joseph & Rosalie Segal Family Health Centre, 8th",Floor,COVID-19
4,"1081 Burrard Street, Vancouver",16-Jul-20,28-Jul-20,"St. Paul's Hospital, NICU",COVID-19
5,"7801 Argyle St, Vancouver",09-Jun-20,"Holy Family Hospital, LTCF (Rehabilitation Unit",declared over),


In [1473]:
vch_outbreaks.rename(columns = {'address', 'restrictionsimposed', 'facility', 'outbreaktype'} inplace = True)

In [1474]:
vch_outbreaks

Unnamed: 0,info,details
10,COVID-19,outbreaktype
11,"9020 Bridgeport Road, Richmond",address
12,05-Aug-20,restrictionsimposed
13,"Richmond Lions Manor - Bridgeport, 2nd Floor",facility
14,COVID-19,outbreaktype
15,"803 West 12th Avenue, Vancouver",address
16,06-Aug-20,restrictionsimposed
17,"Joseph & Rosalie Segal Family Health Centre, 8th",facility
18,Floor,status
19,COVID-19,outbreaktype


In [622]:
vch_pivot = vch_outbreaks.pivot_table(values='info',
                                     columns='details', 
                                     aggfunc=lambda x: '\n'.join(x))

In [623]:
vch_pivot

details,address,facility,outbreaktype,restrictionsimposed,restrictionslifted,status
info,"1081 Burrard Street, Vancouver\n7801 Argyle St...","St. Paul's Hospital, NICU\nHoly Family Hospita...",COVID-19\nCOVID-19\nCOVID-19\nCOVID-19,16-Jul-20\n09-Jun-20\n12-Apr-20\n30-Mar-20,13-Jun-20\n05-Jun-20,declared over)


In [640]:
def vch_split(x):
    for r in x.splitlines():
        return r
    
vch_pivot.address.apply(lambda x: vch_split(x))

info    1081 Burrard Street, Vancouver
Name: address, dtype: object

In [630]:
vch_pivot['health_authority'] = 'vancouver coastal health'

***Fraser***

In [1197]:
f = requests.get(r'https://www.fraserhealth.ca/patients-and-visitors/current-outbreaks#.XzGwMS0ZNQI')
fraser = BeautifulSoup(f.text,'html5lib')
table = fraser.find_all('td')

In [1446]:
newlist = []

for td in table:
        try:
            if td.find('h5').getText() == '\xa0':
                # if no info is provided, it will mess up indexing
                pass
            elif td.find('h5').getText() == '':
                # if no info is provided, it will mess up indexing
                pass
            else:    
                newlist.append(td.find('h5').getText())
        except AttributeError:
            pass
        
            
        
df = pd.DataFrame(newlist)

In [1447]:
newlist

['July 31, 2020',
 'Dania Home',
 'Residential',
 'COVID-19',
 'August 4, 2020',
 'Maple Ridge Seniors Village',
 'Residential',
 'COVID-19',
 'August 6, 2020',
 'Derby Manor',
 'Residential',
 'COVID-19',
 'August 7, 2020',
 'George Derby Centre',
 'Residential',
 'COVID-19',
 'August 8, 2020',
 'New Vista Care Society',
 'Residential',
 'COVID-19']

In [1464]:
fraser = df.groupby(df.index // 4).agg('\n'.join)[0].str.split('\n',expand=True)

In [1465]:
fraser.rename(columns = {0:'datedeclared', 1:'facility', 2:'facilitytype', 3:'outbreaktype'}, inplace = True)

In [1466]:
fraser['health_authority'] = 'fraser'

In [1467]:
fraser

Unnamed: 0,datedeclared,facility,facilitytype,outbreaktype,health_authority
0,"July 31, 2020",Dania Home,Residential,COVID-19,fraser
1,"August 4, 2020",Maple Ridge Seniors Village,Residential,COVID-19,fraser
2,"August 6, 2020",Derby Manor,Residential,COVID-19,fraser
3,"August 7, 2020",George Derby Centre,Residential,COVID-19,fraser
4,"August 8, 2020",New Vista Care Society,Residential,COVID-19,fraser


### Merge facilities

In [649]:
# merge1 = pd.merge(northern, fraser, how = 'outer')

In [1516]:
merge1

Unnamed: 0,city,facility,outbreaktype,datedeclared,health_authority,facilitytype,unit
0,Terrace,Terraceview Lodge - Lakelse Unit,Respiratory Illness,"April 9, 2020-Declared over: April 13, 2020",northern,,
1,,Mission Memorial Hospital,COVID-19,"June 16, 2020",fraser,Hospital,


In [650]:
# merge2 = pd.merge(merge1, vch_pivot, how = 'outer')

In [651]:
merge2

Unnamed: 0,city,facility,outbreaktype,datedeclared,health_authority,facilitytype,unit,address,restrictionsimposed,restrictionslifted,status
0,Terrace,Terraceview Lodge - Lakelse Unit,Respiratory Illness,"April 9, 2020-Declared over: April 13, 2020",northern,,,,,,
1,,Mission Memorial Hospital,COVID-19,"June 16, 2020",fraser,Hospital,,,,,
2,,"St. Paul's Hospital, NICU\nHoly Family Hospita...",COVID-19\nCOVID-19\nCOVID-19\nCOVID-19,,vancouver coastal health,,,"1081 Burrard Street, Vancouver\n7801 Argyle St...",16-Jul-20\n09-Jun-20\n12-Apr-20\n30-Mar-20,13-Jun-20\n05-Jun-20,declared over)


In [1468]:
# update new scrapes
aug4 = pd.merge(merge2, fraser, how = 'outer')

# aug 10
aug10 = pd.merge(merge2, fraser, how = 'outer')
aug10

Unnamed: 0,city,facility,outbreaktype,datedeclared,health_authority,facilitytype,unit,address,restrictionsimposed,restrictionslifted,status
0,Terrace,Terraceview Lodge - Lakelse Unit,Respiratory Illness,"April 9, 2020-Declared over: April 13, 2020",northern,,,,,,
1,,Mission Memorial Hospital,COVID-19,"June 16, 2020",fraser,Hospital,,,,,
2,,"St. Paul's Hospital, NICU\nHoly Family Hospita...",COVID-19\nCOVID-19\nCOVID-19\nCOVID-19,,vancouver coastal health,,,"1081 Burrard Street, Vancouver\n7801 Argyle St...",16-Jul-20\n09-Jun-20\n12-Apr-20\n30-Mar-20,13-Jun-20\n05-Jun-20,declared over)
3,,Dania Home,COVID-19,"July 31, 2020",fraser,Residential,,,,,
4,,Maple Ridge Seniors Village,COVID-19,"August 4, 2020",fraser,Residential,,,,,
5,,Derby Manor,COVID-19,"August 6, 2020",fraser,Residential,,,,,
6,,George Derby Centre,COVID-19,"August 7, 2020",fraser,Residential,,,,,
7,,New Vista Care Society,COVID-19,"August 8, 2020",fraser,Residential,,,,,


# Caveats:

1. VCH outbreak status is colour coded and requires custom scripts for each new outbreak
2. All only report active outbreaks (no information on previous outbreaks)
3. VCH also reports hospital outbreaks - not just LTCs