## Prepare and Clean the Data

In [12]:
# Dependencies
from pathlib import Path
import pandas as pd
import pprint
import os
import requests

In [13]:
# Load the csv file
file_path = Path("Resources/DPIRDWeatherStations.csv")

# Read the csv file
stations_df = pd.read_csv(file_path)

# Display data
stations_df.head()

Unnamed: 0,stationCode,stationName,latitude,longitude,altitude,owner,ownerCode,startDate,endDate,probeHeight,...,capabilities.etoTall,capabilities.frostCondition,capabilities.heatCondition,capabilities.windErosionCondition,capabilities.richardsonUnit,capabilities.chillHour,online,status,comments,jobNumber
0,AN001,Allanooka,-29.063612,114.997161,131.0,WA Department of Primary Industries and Region...,DPIRD,2012-06-19,,1.25,...,Yes,Yes,Yes,Yes,Yes,Yes,Yes,open,,J3121
1,AM001,Amelup,-34.270827,118.268523,200.0,WA Department of Primary Industries and Region...,DPIRD,2019-10-09,,1.25,...,Yes,Yes,Yes,Yes,Yes,Yes,Yes,open,WMO number 99449,P0011
2,SH002,Babakin,-32.12548,118.00406,313.0,WA Department of Primary Industries and Region...,DPIRD,2016-06-22,,1.25,...,Yes,Yes,Yes,Yes,Yes,Yes,Yes,open,,J3451
3,BA,Badgingarra,-30.338049,115.539491,284.0,WA Department of Primary Industries and Region...,DPIRD,2008-11-19,,1.25,...,Yes,Yes,Yes,Yes,Yes,Yes,Yes,open,,J3315
4,BP001,Balingup,-33.7962,116.06398,227.0,WA Department of Primary Industries and Region...,DPIRD,2014-10-24,,1.25,...,Yes,Yes,Yes,Yes,Yes,Yes,Yes,open,,J3461


In [14]:
# Count the number of unique stations
num_unique_stations = stations_df['stationCode'].nunique()

# Print number of stations
print(f"Number of Unique Stations: {num_unique_stations}")

Number of Unique Stations: 225


In [16]:
# Listing all the station codes
all_stations = stations_df['stationCode']
all_stations

0      AN001
1      AM001
2      SH002
3         BA
4      BP001
       ...  
220       YS
221    YE001
222    YU001
223    YU002
224    YU003
Name: stationCode, Length: 225, dtype: object

In [20]:
# API endpoint
url = "https://api.dpird.wa.gov.au/v2/weather/stations/rainfall"

In [22]:
# Parameters
params = {
    "startDate": "2022-11-01",  
    "endDate": "2023-11-30",    
    "stationCode": ",".join(all_stations),  
    "limit": 225  
}

# API key
api_key = "UyyNRAlaRV91Y3jCe4Ce9Vx922b2KY6R"

# Headers with API key
headers = {
    "api_key": api_key
}

# API request
response = requests.get(url, params=params, headers=headers)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()

    # Process the data as needed
    for station in data['collection']:
        station_code = station['stationCode']
        station_name = station['stationName']
        rainfall_data = station['rainfall']
        
        # Process or store the rainfall data as needed
        print(f"Station Code: {station_code}, Station Name: {station_name}")
        print("Rainfall Data:", rainfall_data)
        print("-" * 50)

else:
    print(f"Error {response.status_code}: {response.text}")

Station Code: AN001, Station Name: Allanooka
Rainfall Data: {'to9AM': 0, 'since9AM': 0, 'currentHour': 0, 'last24Hrs': 0, 'last7Days': 0, 'last14Days': 0, 'monthToDate': 0, 'yearToDate': 307, 'period': 326.2}
--------------------------------------------------
Station Code: AM001, Station Name: Amelup
Rainfall Data: {'to9AM': 0, 'since9AM': 0.2, 'currentHour': 0, 'last24Hrs': 0.2, 'last7Days': 0.2, 'last14Days': 0.2, 'monthToDate': 0.2, 'yearToDate': 304.2, 'period': 343.6}
--------------------------------------------------
Station Code: SH002, Station Name: Babakin
Rainfall Data: {'to9AM': 0, 'since9AM': 0, 'currentHour': 0, 'last24Hrs': 0, 'last7Days': 0.6, 'last14Days': 0.6, 'monthToDate': 0.6, 'yearToDate': 258.6, 'period': 272.2}
--------------------------------------------------
Station Code: BA, Station Name: Badgingarra
Rainfall Data: {'to9AM': 0, 'since9AM': 0, 'currentHour': 0, 'last24Hrs': 0, 'last7Days': 0.8, 'last14Days': 0.8, 'monthToDate': 0.8, 'yearToDate': 330, 'period'

In [27]:
# Create an empty list to store dataframes
dfs = []

# Process the data as needed
for station in data['collection']:
    station_code = station['stationCode']
    station_name = station['stationName']
    rainfall_data = station['rainfall']
    date_str = station['dateTime']

    # Extract year from the date string (assuming it's in 'YYYY-MM-DD' format)
    year_data = int(date_str.split('-')[0])

    # Only consider data for the year 2023
    if year_data == 2023:
        # Create a dataframe for the current station
        station_df = pd.DataFrame({
            'Station Code': [station_code],
            'Station Name': [station_name],
            'Year': [year_data],
            'To 9 AM': [rainfall_data['to9AM']],
            'Since 9 AM': [rainfall_data['since9AM']],
            'Current Hour': [rainfall_data['currentHour']],
            'Last 24 Hrs': [rainfall_data['last24Hrs']],
            'Last 7 Days': [rainfall_data['last7Days']],
            'Last 14 Days': [rainfall_data['last14Days']],
            'Month to Date': [rainfall_data['monthToDate']],
            'Year to Date': [rainfall_data['yearToDate']],
            'Period': [rainfall_data['period']]
        })

        # Append the dataframe to the list
        dfs.append(station_df)

# Concatenate all dataframes into a single dataframe
rainfall_df = pd.concat(dfs, ignore_index=True)

# Display the DataFrame
rainfall_df

Unnamed: 0,Station Code,Station Name,Year,To 9 AM,Since 9 AM,Current Hour,Last 24 Hrs,Last 7 Days,Last 14 Days,Month to Date,Year to Date,Period
0,AN001,Allanooka,2023,0,0,0,0,0.0,0.0,0.0,307.0,326.2
1,AM001,Amelup,2023,0,0.2,0,0.2,0.2,0.2,0.2,304.2,343.6
2,SH002,Babakin,2023,0,0,0,0,0.6,0.6,0.6,258.6,272.2
3,BA,Badgingarra,2023,0,0,0,0,0.8,0.8,0.8,330.0,345.2
4,BP001,Balingup,2023,0.2,0,0,0.2,0.2,0.2,1.2,577.8,601.2
...,...,...,...,...,...,...,...,...,...,...,...,...
214,YS,Yilgarn,2023,0,0,0,0,5.6,5.6,5.6,237.4,246.8
215,YE001,York East,2023,0,0,0,0,0.2,0.2,0.2,344.6,372.2
216,YU001,Yuna,2023,0,0,0,0,0.0,0.0,0.0,258.8,281
217,YU002,Yuna NE,2023,0,0,,0,0.0,0.0,0.0,196.8,218.8


In [30]:
# Drop Current Hour column
rainfall_df.drop(columns=['Current Hour'], inplace = True)
rainfall_df

Unnamed: 0,Station Code,Station Name,Year,To 9 AM,Since 9 AM,Last 24 Hrs,Last 7 Days,Last 14 Days,Month to Date,Year to Date,Period
0,AN001,Allanooka,2023,0,0,0,0.0,0.0,0.0,307.0,326.2
1,AM001,Amelup,2023,0,0.2,0.2,0.2,0.2,0.2,304.2,343.6
2,SH002,Babakin,2023,0,0,0,0.6,0.6,0.6,258.6,272.2
3,BA,Badgingarra,2023,0,0,0,0.8,0.8,0.8,330.0,345.2
4,BP001,Balingup,2023,0.2,0,0.2,0.2,0.2,1.2,577.8,601.2
...,...,...,...,...,...,...,...,...,...,...,...
214,YS,Yilgarn,2023,0,0,0,5.6,5.6,5.6,237.4,246.8
215,YE001,York East,2023,0,0,0,0.2,0.2,0.2,344.6,372.2
216,YU001,Yuna,2023,0,0,0,0.0,0.0,0.0,258.8,281
217,YU002,Yuna NE,2023,0,0,0,0.0,0.0,0.0,196.8,218.8


In [33]:
# Remove rows with any missing value
rainfall_df = rainfall_df.dropna()
rainfall_df

Unnamed: 0,Station Code,Station Name,Year,To 9 AM,Since 9 AM,Last 24 Hrs,Last 7 Days,Last 14 Days,Month to Date,Year to Date,Period
0,AN001,Allanooka,2023,0,0,0,0.0,0.0,0.0,307.0,326.2
1,AM001,Amelup,2023,0,0.2,0.2,0.2,0.2,0.2,304.2,343.6
2,SH002,Babakin,2023,0,0,0,0.6,0.6,0.6,258.6,272.2
3,BA,Badgingarra,2023,0,0,0,0.8,0.8,0.8,330.0,345.2
4,BP001,Balingup,2023,0.2,0,0.2,0.2,0.2,1.2,577.8,601.2
...,...,...,...,...,...,...,...,...,...,...,...
214,YS,Yilgarn,2023,0,0,0,5.6,5.6,5.6,237.4,246.8
215,YE001,York East,2023,0,0,0,0.2,0.2,0.2,344.6,372.2
216,YU001,Yuna,2023,0,0,0,0.0,0.0,0.0,258.8,281
217,YU002,Yuna NE,2023,0,0,0,0.0,0.0,0.0,196.8,218.8


In [43]:
rainfall_df.to_csv('Resources/rainfall_data.csv')