In [None]:
import pandas as pd
import requests
from pandas import json_normalize
from pathlib import Path
from api_key import api_key

In [None]:
# Set the API base URL
url = "https://api.dpird.wa.gov.au/v2/weather/stations??offset=0&limit=225&group=all"
# Headers with API key
headers = {
    "api_key": api_key
}

In [None]:
# Parse the JSON and retrieve data
response = requests.get(url,headers=headers)
all_stations_data = response.json()

In [None]:
#convert the data to a DataFrame using pandas
weather_stations = all_stations_data['collection']
df = pd.DataFrame(weather_stations)

# Flatten nested 'capabilities'columns
df = pd.concat([df.drop(['capabilities'], axis=1), json_normalize(df['capabilities'])], axis=1)

# Drop rows with NA values in specific columns
df_cleaned = df.dropna(subset=['stationCode','stationName','latitude','longitude'], inplace=True)

df

In [None]:
df.to_csv("../Resources/stationslist.csv")

In [None]:
# extract'stationCode' to be used for the next API
station_code_list = df['stationCode'].tolist()

In [None]:
# Set the API base URL
url = "https://api.dpird.wa.gov.au/v2/weather/stations/summaries/daily?"
# Headers with API key
headers = {
    "api_key": api_key
}

In [None]:
# Initialize an empty list to store the data for all stations
all_stations_data = []

# Iterate through each station
for station in station_code_list:
    # Parameters for the specific station
    params = {
        "endDate": "2023-12-29",
        "group": "api",
        "includeClosed": "false",
        "limit": 225,
        "offset": 0,
        "startDate": "2023-01-01",
        "stationCode": station
    }

    # Make the request for each station
    response = requests.get(url, params=params, headers=headers)
    data = response.json()

    # Check if the 'collection' list is non-empty and has summaries
    if "collection" in data and data["collection"]:
        # Extract relevant information for each summary
        for summary in data["collection"][0]["summaries"]:
            try:
                station_info = {
                    "Year": summary['period']['year'],
                    "Month":summary['period']['month'],
                    "Day":summary['period']['day'],
                    "stationCode": data["collection"][0]["stationCode"],
                    "stationName": data["collection"][0]["stationName"],
                    "rainfall": summary["rainfall"],
                    "relativeHumidity": summary["relativeHumidity"]["avg"],
                    "airTemperature_avg": summary["airTemperature"]["avg"],
                    "wind_avg_speed": summary["wind"][0]["avg"]["speed"]
                }

                # Append the information for the current station and summary to the list
                all_stations_data.append(station_info)
                
            except:
                station_info = {
                    "Year": "N/A",
                    "Month":"N/A",
                    "Day":"N/A",
                    "stationCode": "N/A",
                    "stationName": "N/A",
                    "rainfall": "N/A",
                    "relativeHumidity": "N/A",
                    "airTemperature_avg": "N/A",
                    "wind_avg_speed": "N/A"
                }

                # Append the information for the current station and summary to the list
                all_stations_data.append(station_info)

In [None]:
# Convert the list of dictionaries into a DataFrame
all_stations = pd.DataFrame(all_stations_data)

# Drop NA value from dataframe
all_stations.dropna(subset=['Year', 'Month', 'Day','stationCode','stationName','rainfall','relativeHumidity','airTemperature_avg','wind_avg_speed'], inplace=True)

# Display dataframe
all_stations.head()

In [None]:
all_stations.to_csv("../Resources/raindata.csv", index=False)