In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import re

In [1]:
# Get a list of all the urls for the avalanche reports for Salt-Lake
page_numbers = range(17) #0 - 16 inclusive
url_base = 'https://utahavalanchecenter.org/archives/forecasts/salt-lake?page='
urls = [url_base + str(page_number) for page_number in page_numbers]

sub_urls = []


# REQUESTS
for website in urls:
    response = requests.get(website)
    soup = BeautifulSoup(response.content, 'html.parser')
    links = soup.find_all('a')
    for link in links:
        print(link.get('href'))
        sub_urls.append(link.get('href'))

pd.DataFrame(sub_urls).to_csv('sub_urls.csv')


https://store.utahavalanchecenter.org/collections/events-1
https://www.mammut.com/
/
/observations-avalanches/submit
None
/forecast/logan
/forecast/ogden
/forecast/salt-lake
/forecast/provo
/forecast/uintas
/forecast/skyline
/forecast/moab
/forecast/abajos
/node/13549
/archives/forecasts
/node/41482
/node/13652
/observations
/avalanches
/avalanches/fatalities
http://wasatchbackcountryskiing.com/
/node/16726
/node/69813
/node/70741
/node/69815
/node/70744
https://store.utahavalanchecenter.org/collections/events-1
https://store.utahavalanchecenter.org/
/node/21578
/node/13544
https://utahavalanchecenter.org/core-values
/node/13545
/node/13546
/node/39841
/node/39875
/node/16727
/user/login
/node/39859
/join
/forecast/logan
/forecast/ogden
/forecast/salt-lake
/forecast/provo
/forecast/uintas
/forecast/skyline
/forecast/moab
/forecast/abajos
/node/13549
/archives/forecasts
/node/41482
/node/13652
/observations
/avalanches
/avalanches/fatalities
http://wasatchbackcountryskiing.com/
/node/16

In [4]:
# Clean the data and put it into a dataframe
df = pd.read_csv('sub_urls.csv')
df.columns = ['index', 'url']

# filter urls with prefix 'https://utahavalanchecenter.org/forecast/salt-lake/'
df = df[df['url'].str.startswith('https://utahavalanchecenter.org/forecast/salt-lake/')]
df = df.reset_index(drop=True)
df['url'] = df['url'].str.replace('https://utahavalanchecenter.org/forecast/salt-lake/', '')


0    https://store.utahavalanchecenter.org/collecti...
1                              https://www.mammut.com/
2                                                    /
3                      /observations-avalanches/submit
4                                                  NaN
Name: url, dtype: object

In [6]:
# More cleaning
prefix = '/forecast/salt-lake/'
df = df.fillna('') # fill NaNs with empty string
df = df[df['url'].str.startswith(prefix)]
df = df.reset_index(drop=True)
base = 'https://utahavalanchecenter.org'
df['url'] = base + df['url']

In [15]:
# Drop duplicates and save to csv
df.drop_duplicates(subset=['url'], inplace=True)
df = df.reset_index(drop=True)
df.to_csv('daily_reports_urls.csv', index=False)

In [158]:
def get_page_source(url):
    response = requests.get(url)
    page_source = response.content
    return page_source

In [159]:
page_source = get_page_source(df['url'][65])

In [178]:
# first link will be the overall forecast image
def get_forecast_image_links(page_source):
    # find all links that start with '/sites/default/files/forecast/'
    links = re.findall(r'<img.*?src="/(sites/default/files/forecast/.*?)">', page_source.decode('utf-8'))
    # append 'https://utahavalanchecenter.org' to the beginning of each link
    links = ['https://utahavalanchecenter.org/' + link for link in links]
    return links

links = get_forecast_image_links(page_source)


In [179]:
def get_color(image, pixel_location):
    return image.getpixel(pixel_location)

In [180]:
def load_image(url):
    return Image.open(requests.get(url, stream=True).raw)

In [181]:
def show_image(image):
    plt.imshow(image)
    plt.show()

In [182]:
def classify_color(rgba):
    color_map = {
        'red': (237, 28, 36, 255), # High
        'orange': (247, 148, 30, 255), # Considerable
        'yellow': (255, 242, 0, 255), # Moderate
        'green': (80, 184, 72, 255), # Low
        'blue': (68, 187, 238, 255), #
        'gray': (192, 192, 192, 255), # 
        'white': (255, 255, 255, 255), #'  
        'black': (0, 0, 0, 255), # Extreme
        'transparent': (0, 0, 0, 0) #
    }
    # Get the color with the smallest distance from the given color
    from scipy.spatial import distance
    distances = []
    for color in color_map.values():
        distances.append(distance.euclidean(rgba, color))
    min_index = np.argmin(distances)
    return list(color_map.keys())[min_index]

In [183]:
def get_forecast_level(image, pixel_locations):
    # Get the color at each pixel for a list of pixel locations
    colors = []
    for location in pixel_locations:
        rgba = get_color(image, location)
        color = classify_color(rgba)
        colors.append(color)
    return colors

In [184]:
def get_forecast_from_image(image):
    pixel_map = {
    'high': {
        'N': (200, 132),
        'NE': (220, 132),
        'E': (240, 154),
        'SE': (220, 176),
        'S': (200, 176),
        'SW': (180, 176),
        'W': (160, 154),
        'NW': (180, 132)
    },
    'mid': {
        'N': (200, 110),
        'NE': (240, 110),
        'E': (270, 154),
        'SE': (240, 220),
        'S': (200, 230),
        'SW': (140, 198),
        'W': (120, 154),
        'NW': (140, 110),
    },
    'low': {
        'N': (200, 66),
        'NE': (300, 110),
        'E': (320, 176),
        'SE': (280, 264),
        'S': (200, 286),
        'SW': (100, 242),
        'W': (80, 176),
        'NW': (100, 110),
    },    
}
    forecast = {}
    for level, pixel_locations in pixel_map.items():
        forecast[level] = get_forecast_level(image, pixel_locations.values())
    return forecast

In [185]:
def get_forecast_from_url(url):
    image = load_image(url)
    forecast = get_forecast_from_image(image)
    return forecast



In [187]:
# REQUEST
f = get_forecast_from_url(links[0])
f

{'high': ['red', 'red', 'red', 'red', 'red', 'red', 'red', 'red'],
 'mid': ['orange',
  'orange',
  'orange',
  'orange',
  'orange',
  'orange',
  'orange',
  'orange'],
 'low': ['orange',
  'orange',
  'orange',
  'orange',
  'orange',
  'orange',
  'orange',
  'orange']}

In [None]:
avalanche_rose = pd.DataFrame(columns=['date', 'HN', 'HNE', 'HE', 'HSE', 'HS', 'HSW', 'HW', 'HNW', 
                                               'MN', 'MNE', 'ME', 'MSE', 'MS', 'MSW', 'MW', 'MNW',
                                               'LN', 'LNE', 'LE', 'LSE', 'LS', 'LSW', 'LW', 'LNW'])
    