In [130]:
import requests
import pandas as pd
from datetime import date, datetime, timedelta
from bs4 import BeautifulSoup
import numpy as np
from math import floor
from pathlib import Path  


Wind speed in Knots converted to KM/H (*1.852)
3 day loop

1) Port Hardy 
2) Quillayute 

In [2]:
# Creates date range from str format dates
# e.g. "2021-01-01"
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

In [118]:
base_url = "http://weather.uwyo.edu/cgi-bin/sounding?region=naconf&TYPE=TEXT%3ALIST"

cols = ["Date", 
        "Station", 
        "Pressure", 
        "Height", 
        "Temp", 
        "DewPoint", 
        "Relative_Humidity", 
        "Mean_Mixed_Layer", 
        "Wind_Direction", 
        "Wind_Speed", 
        "Potential_Temp",
        "Equivalent_Potential_Temp",
        "Virtual_Potential_Temp"]

stations = [
    {"id": 72797, "name": "Quillayute"},
    {"id": 73033, "name": "Vernon"},
    {"id": 71109, "name": "Port Hardy"}]

now = [
    datetime.now().year, 
    datetime.now().month, 
    datetime.now().day]

# Create date range
# ~~~~~~~~~~~~~~~~~
dates_arr = []
start_date = date(2022, 4, 1)
#start_date = date(now[0], now[1], now[2])
end_date = date(now[0], now[1], now[2]+1)
print(f"Start Date:\t{start_date}\nEnd Date:\t{end_date}")

for single_date in daterange(start_date, end_date):
    single_date = (str(single_date).split("-")) 
    dates_arr.append([ int(x) for x in single_date ])
       
# Create list of URLs for each station
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
urls = []
dates_idx = []
station_idx = []
for d in dates_arr:
    for station in stations:
        url = base_url
        url += "&YEAR=" + str(d[0])
        url += "&MONTH=" + str(d[1])
        #url += "&FROM=" + str(now[1]) + str(now[2])
        #url += "&TO=" + str(now[1]) + str(now[2])
        url += "&FROM=" + str(d[1]) + "12"
        url += "&TO=" + str(d[1]) + "12"
        url += "&STNM=" + str(station["id"])
        url += "&REPLOT=1"
        urls.append(url)
        
        
        dates_idx.append("%02d-%02d-%02d" % (d[0], d[1], d[2]))
        station_idx.append(station["name"])

print(urls[0])

Start Date:	2022-04-01
End Date:	2022-06-13
http://weather.uwyo.edu/cgi-bin/sounding?region=naconf&TYPE=TEXT%3ALIST&YEAR=2022&MONTH=4&FROM=412&TO=412&STNM=72797&REPLOT=1


In [63]:
data = []
reading_idx = []

for i, url in enumerate(urls):
    # scrape site data
    # ~~~~~~~~~~~~~~~~
    page = requests.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    results = str(soup.find('pre'))
    
    # we want 700 first // 850 second
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    data.append(results[results.find("700"):].split()[:11])
    data[-1].insert(0, dates_idx[i])
    data[-1].insert(1, station_idx[i])
    data.append(results[results.find("850"):].split()[:11])
    data[-1].insert(0, dates_idx[i])
    data[-1].insert(1, station_idx[i])


In [119]:
# Remove rogue "e" values from pressure field
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
pressures = [700, 850]
ctr = 0
for item in data:
    if(not str(item[2]).isnumeric()):
        item[2] = pressures[ctr%2]
    ctr += 1

In [129]:
df = pd.DataFrame(data, columns=cols)

# Convert DataFrame to numeric values
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
for col in cols[2:]:
    try:
        df[col] = pd.to_numeric(df[col])
    except:
        print(col)
    
# Convert wind speed from Knots to KM/H
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
df['Wind_Speed'] = df['Wind_Speed'].multiply(1.852)
df.head()

Unnamed: 0,Date,Station,Pressure,Height,Temp,DewPoint,Relative_Humidity,Mean_Mixed_Layer,Wind_Direction,Wind_Speed,Potential_Temp,Equivalent_Potential_Temp,Virtual_Potential_Temp
0,2022-04-01,Quillayute,700,2809.0,-10.1,-11.2,92.0,2.33,230.0,74.08,291.3,298.4,291.7
1,2022-04-01,Quillayute,850,1284.0,-2.3,-2.3,100.0,3.82,260.0,64.82,283.7,294.7,284.4
2,2022-04-01,Vernon,700,2815.0,-8.1,-10.9,80.0,2.39,205.0,77.784,293.5,300.8,293.9
3,2022-04-01,Vernon,850,1256.0,2.4,-2.6,70.0,3.73,190.0,25.928,288.6,299.6,289.3
4,2022-04-01,Port Hardy,700,2685.0,-12.3,-16.6,70.0,1.5,200.0,59.264,288.8,293.5,289.1


In [131]:
filepath = Path('/Users/liamkenny/Documents/Programming/Aurora/scrape.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True) 
df.to_csv(filepath) 

In [124]:
df.index

RangeIndex(start=0, stop=438, step=1)

In [127]:
df.groupby(["Date", "Station"]).Temp.mean()

Date        Station   
2022-04-01  Port Hardy   -7.80
            Quillayute   -6.20
            Vernon       -2.85
2022-04-02  Port Hardy   -7.80
            Quillayute   -6.20
                          ... 
2022-06-11  Quillayute   -1.65
            Vernon        0.85
2022-06-12  Port Hardy   -2.55
            Quillayute   -1.65
            Vernon        0.85
Name: Temp, Length: 219, dtype: float64

In [128]:
df.head

<bound method NDFrame.head of            Date     Station  Pressure  Height  Temp  DewPoint  \
0    2022-04-01  Quillayute       700  2809.0 -10.1     -11.2   
1    2022-04-01  Quillayute       850  1284.0  -2.3      -2.3   
2    2022-04-01      Vernon       700  2815.0  -8.1     -10.9   
3    2022-04-01      Vernon       850  1256.0   2.4      -2.6   
4    2022-04-01  Port Hardy       700  2685.0 -12.3     -16.6   
..          ...         ...       ...     ...   ...       ...   
433  2022-06-12  Quillayute       850  1477.0   1.4      -0.6   
434  2022-06-12      Vernon       700  3017.0  -4.7      -6.9   
435  2022-06-12      Vernon       850  1448.0   6.4       0.4   
436  2022-06-12  Port Hardy       700  2997.0  -6.7      -8.8   
437  2022-06-12  Port Hardy       850  1446.0   1.6      -0.8   

     Relative_Humidity  Mean_Mixed_Layer  Wind_Direction  Wind_Speed  \
0                 92.0              2.33           230.0      74.080   
1                100.0              3.82     

## APP (2)

Easy to read / on the go / big text

700 | 3020m | -7 |
- - - - - - - - -
850 | 1590m | -9 |

Wind direction in an arrow // color coded magnitude and direction


Gathering data from brohm ridge needs to be done hourly
- looking like avalanche canada page on Wx


## Drive (1)
Guide daily forms
- Integrate to slack

Users need to see
- Just the weather info


## Webcms (3)
(load one by one -- javascript)
- cayoosh
- whistler peak
- blackcomb
- rubble creek
- brohm ridge
- s2s


western developement fund of canada /// Western Economic Diversification Canada

In [None]:
table = [dates]
dates = [locations]
location = [variables]