In [250]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


%matplotlib inline

In [251]:
# Get data on avalanche forecasts and incidents from Avalanche Canada
# Avalanche Canada has an unstable public api
# https://github.com/avalanche-canada/ac-web
# Since API might change, this code might break
import json
import os
import urllib.request
import pandas as pd
import time
import requests
import io
import zipfile
import warnings


# Incidents
url = "http://incidents.avalanche.ca/public/incidents/?format=json"
req = urllib.request.Request(url)
with urllib.request.urlopen(req) as response:
    result = json.loads(response.read().decode('utf-8'))
incident_list = result["results"]
while (result["next"] != None):
    req = urllib.request.Request(result["next"])
    with urllib.request.urlopen(req) as response:
        result = json.loads(response.read().decode('utf-8'))
    incident_list = incident_list + result["results"]
incidents_brief = pd.DataFrame.from_dict(incident_list,orient="columns")
pd.options.display.max_rows = 20
pd.options.display.max_columns = 8
incidents_brief

Unnamed: 0,id,date,location,location_province,group_activity,num_involved,num_injured,num_fatal
0,8bc4720d-498c-4793-81ef-c43db9f36ca4,2021-11-27,"Sunshine Bowl, Hasler Area",BC,Snowmobiling,3.0,0.0,1
1,6a3a4698-d047-4082-bdea-92f4db7e63bf,2021-05-30,Mount Andromeda-Skyladder,AB,Mountaineering,2.0,0.0,2
2,ba14a125-29f7-4432-97ad-73a53207a5e7,2021-04-05,Haddo Peak,AB,Skiing,2.0,0.0,1
3,59023c05-b679-4e9f-9c06-910021318663,2021-03-29,Eureka Peak,BC,Snowmobiling,1.0,0.0,1
4,10774b2d-b7de-42ac-a600-9828cb4e6129,2021-03-04,Reco Mountain,BC,Snowmobiling,1.0,0.0,1
...,...,...,...,...,...,...,...,...
484,101c517b-29a4-4c49-8934-f6c56ddd882d,1840-02-01,Château-Richer,QC,Unknown,,,1
485,b2e1c50a-1533-4145-a1a2-0befca0154d5,1836-02-09,Quebec,QC,Unknown,,,1
486,18e8f963-da33-4682-9312-57ca2cc9ad8d,1833-05-24,Carbonear,NL,Unknown,,0.0,1
487,083d22df-ed50-4687-b9ab-1649960a0fbe,1825-02-04,Saint-Joseph de Lévis,QC,Inside Building,,,5


In [252]:
# incidents
# We can get more information about these incidents e.g. "https://www.avalanche.ca/incidents/37d909e4-c6de-43f1-8416-57a34cd48255"
# this information is also available through the API
def get_incident_details(id):
    url = "http://incidents.avalanche.ca/public/incidents/{}?format=json".format(id)
    req = urllib.request.Request(url)
    with urllib.request.urlopen(req) as response:
        result = json.loads(response.read().decode('utf-8'))
    return(result)


incidentsfile = "https://datascience.quantecon.org/assets/data/avalanche_incidents.csv"

# To avoid loading the avalanche Canada servers, we save the incident details locally.
if (not os.path.isfile(incidentsfile)):
    incident_detail_list = incidents_brief.id.apply(get_incident_details).to_list()
    incidents = pd.DataFrame.from_dict(incident_detail_list, orient="columns")
    incidents.to_csv(incidentsfile)
else:
    incidents = pd.read_csv(incidentsfile)

incidents

Unnamed: 0,id,ob_date,location,location_desc,...,weather_comment,snowpack_obs,snowpack_comment,documents
0,8bc4720d-498c-4793-81ef-c43db9f36ca4,2021-11-27,"Sunshine Bowl, Hasler Area",Approx. 17km East of Powder King ski area,...,"Overcast, windy conditions were reported with ...","{'hs': None, 'hn24': None, 'hst': None, 'hst_r...",A snow profile near the avalanche on the follo...,"[{'date': '2021-11-30', 'title': 'Scene photo'..."
1,6a3a4698-d047-4082-bdea-92f4db7e63bf,2021-05-30,Mount Andromeda-Skyladder,Approximately 96km SE of Jasper,...,,"{'hs': None, 'hn24': None, 'hst': None, 'hst_r...",,"[{'date': '2021-06-01', 'title': 'Mt Andromeda..."
2,ba14a125-29f7-4432-97ad-73a53207a5e7,2021-04-05,Haddo Peak,Approximately 6km SW of Lake Louise Village,...,,"{'hs': None, 'hn24': None, 'hst': None, 'hst_r...",,"[{'date': '2021-04-05', 'title': 'Overview pho..."
3,59023c05-b679-4e9f-9c06-910021318663,2021-03-29,Eureka Peak,Approximately 100km east of Williams Lake,...,,"{'hs': None, 'hn24': None, 'hst': None, 'hst_r...",,"[{'date': '2021-04-01', 'title': 'Overview', '..."
4,10774b2d-b7de-42ac-a600-9828cb4e6129,2021-03-04,Reco Mountain,Approximately 13km east of New Denver,...,,"{'hs': None, 'hn24': None, 'hst': None, 'hst_r...",,"[{'date': '2021-03-05', 'title': 'Scene Overvi..."
...,...,...,...,...,...,...,...,...,...
484,101c517b-29a4-4c49-8934-f6c56ddd882d,1840-02-01,Château-Richer,,...,,"{'hs': None, 'hn24': None, 'hst': None, 'hst_r...",,[]
485,b2e1c50a-1533-4145-a1a2-0befca0154d5,1836-02-09,Quebec,more details unknown,...,,"{'hs': None, 'hn24': None, 'hst': None, 'hst_r...",,[]
486,18e8f963-da33-4682-9312-57ca2cc9ad8d,1833-05-24,Carbonear,,...,,"{'hs': None, 'hn24': None, 'hst': None, 'hst_r...",,"[{'title': 'Carbonear, May 24, 1833', 'source'..."
487,083d22df-ed50-4687-b9ab-1649960a0fbe,1825-02-04,Saint-Joseph de Lévis,Pointe Lévis,...,,"{'hs': None, 'hn24': None, 'hst': None, 'hst_r...",,[]


In [253]:
# clean up activity names
incidents.group_activity.unique()

array(['Snowmobiling', 'Mountaineering', 'Skiing', 'Skiing/Snowboarding',
       'Snow Biking', 'Snowshoeing', 'Snowboarding', 'Backcountry Skiing',
       'Ice Climbing', 'Ski touring', 'Heliskiing',
       'Snowshoeing & Hiking', 'Mechanized Skiing', 'Work',
       'Other Recreational', 'Out-of-bounds Skiing',
       'At Outdoor Worksite', 'Lift Skiing Closed', 'Lift Skiing Open',
       'Hunting/Fishing', 'Out-of-Bounds Skiing', 'Control Work',
       'Inside Building', 'Car/Truck on Road', 'Inside Car/Truck on Road',
       'Unknown', 'Outside Building'], dtype=object)

In [254]:
from itertools import chain
# pd.DataFrame(chain.from_iterable(incidents.avalanche_obs)).replace(r'^s*$', float('NaN'), regex = True).dropna()
pd.DataFrame(chain.from_iterable(incidents.avalanche_obs))

Unnamed: 0,size,type,trigger,aspect,elevation,slab_width,slab_thickness,observation_date
0,3.0,S,Ma,NE,1700.0,350.0,60.0,
1,2.5,S,Sa,N,3075.0,60.0,75.0,
2,2.0,S,Sa,E,2950.0,40.0,50.0,
3,2.5,CS,Sa,E,2170.0,50.0,,
4,3.0,S,Ma,W,2465.0,125.0,85.0,
...,...,...,...,...,...,...,...,...
484,,,U,,,,,1800-01-01
485,,,U,,,,,1843-12-18
486,,,U,,,,,1840-02-01
487,,,U,,,,,1836-02-09


In [255]:
incidents.columns

Index(['id', 'ob_date', 'location', 'location_desc', 'location_coords',
       'location_coords_type', 'location_elevation', 'location_province',
       'num_involved', 'num_injured', 'num_fatal', 'comment', 'group_activity',
       'avalanche_obs', 'weather_obs', 'weather_comment', 'snowpack_obs',
       'snowpack_comment', 'documents'],
      dtype='object')

In [273]:
from bs4 import BeautifulSoup
import requests
from urllib.request import Request, urlopen

site = "https://avalanche.org/avalanche-accidents/"

# This is done to prevent 'HTTPError: HTTP Error 403: Forbidden'
hdr = {'User-Agent': 'Mozilla/5.0'}
req = Request(site,headers=hdr)
page = urlopen(req)

# Prepare soup to access the source code
soup = BeautifulSoup(page)

# Scrape the source code to access the source containing the tables
soup.find('div', class_='content-area').iframe

# Read the cleaned up source and convert it into dataframes 
df = pd.read_html('https://avalanche.state.co.us/caic/acc/acc_us.php', parse_dates=True)

# Only select the useful tables
df = df[1::2]

# Clean the tables and merge them into one single dataframe representing cases in the US
def format_date_col(s, year):
    """
    This function is used to clean the date columns.
    It takes a string and cleans the string by removing the dagger sign and
    adds the year to the date string.
    """
    month = s.replace('†','').replace('/','-')
    year = str(year) + '-'
    return year+month

years = (2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009)
for data, yr in zip(df, years):
    data['Date'] = data['Date'].apply(format_date_col, args=[yr])
    
us_incidents = pd.concat(df).reset_index().drop(columns = ["index"])

us_incidents

Unnamed: 0,Date,State,Location,Description,Killed
0,2021-12-17,ID,"Ryan Peak, Idaho",1 skier and 1 snowmobiler killed,2
1,2021-12-11,WA,"Silver Basin, closed portion of Crystal Mounta...",6 backcountry tourers caught and 1 killed,1
2,2020-05-13,AK,"Ruth Glacier, Denali National Park and Preserve","2 climbers caught in serac fall, 1 killed",1
3,2020-03-27,AK,Matanuska Glacier,1 heliskier killed,1
4,2020-03-22,CO,Lime Creek south of Edwards,"2 sidecountry skiers caught, 1 buried and killed",1
...,...,...,...,...,...
269,2009-01-06,CO,Battle Mountain - outside Vail Mountain ski area,"1 snowboader caught, partially buried critical...",1
270,2009-01-03,MT,"Scotch Bonnet Mountain, near Lulu Pass","1 Snowmobiler caught, buried, and killed",1
271,2009-01-02,OR,Near Paulina Peak,"1 Snowmobiler caught, buried, and killed",1
272,2009-12-17,ID,"Rock Lake, Cascade, Idaho","2 snowmobilers caught, buried, 1 rescued, 1 ki...",1
