In [80]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

# Scraping Data from the Milwaukee Journal Sentinal

The Milwaukee Journal Sentinal provides a tool for finding county level weather data, sourced from data provided by the National Centers for Environmental Information (NCEI).

For Spokane County, Washington, that data is available here:

[https://data.jsonline.com/weather-data/spokane-county-washington/53063/2023-09-01/](https://data.jsonline.com/weather-data/spokane-county-washington/53063/2023-09-01/)

On this page, you can request "View all Spokane County, Washington data in a table". The result is here:

[https://data.jsonline.com/weather-data/spokane-county-washington/53063/2023-09-01/table/](https://data.jsonline.com/weather-data/spokane-county-washington/53063/2023-09-01/table/)

The code below scrapes the data from the table on this page and stores it in a pandas dataframe. To download data from another county, modify the `url` in the next code block.

In [90]:
url = 'https://data.jsonline.com/weather-data/spokane-county-washington/53063/2001-12-01/table/'
#url = 'https://data.jsonline.com/weather-data/lewis-county-washington/53041/2001-12-01/table/'
site_data = requests.get(url)

In [91]:
soup = BeautifulSoup(site_data.content, 'html.parser')

In [92]:
table = soup.find('table', {'id':'hitable'})
dates = table.find_all('div')
date_list = [date.text for date in dates]

In [93]:
cells = [cell.text for cell in table.find_all('td')]
data = [cells[i:i+6] for i in range(1,len(cells)-6,7)]

In [94]:
df = pd.DataFrame(data)

In [95]:
df['date']=date_list[:-1]

In [96]:
column_names = {0:'avg_temp',1:'min_temp',2:'max_temp',3:'precipitation',4:'heating_deg_days', 5:'cooling_deg_days'}
df = df[['date',0,1,2,3,4,5]].rename(columns=column_names)

In [97]:
df['date']=pd.to_datetime(df['date'])

In [98]:
df.head(10)

Unnamed: 0,date,avg_temp,min_temp,max_temp,precipitation,heating_deg_days,cooling_deg_days
0,2023-09-01,59.5,46.5,72.5,0.87,199,34.0
1,2023-08-01,69.5,54.7,84.2,1.36,29,168.0
2,2023-07-01,71.1,54.6,87.5,0.15,15,203.0
3,2023-06-01,63.9,50.2,77.5,0.91,98,65.0
4,2023-05-01,61.3,48.6,74.0,1.6,157,43.0
5,2023-04-01,44.6,33.4,55.7,1.5,613,
6,2023-03-01,36.5,27.0,46.1,1.27,882,
7,2023-02-01,31.4,23.4,39.4,1.01,940,
8,2023-01-01,31.1,26.1,36.0,1.56,1052,
9,2022-12-01,23.9,18.7,29.0,3.94,1275,
