In [1]:
import pandas as pd
import requests
from pandas import json_normalize
from api_key import api_key

In [2]:
# Set the API base URL
url = "https://api.dpird.wa.gov.au/v2/weather/stations??offset=0&limit=225&group=all"
# Headers with API key
headers = {
    "api_key": api_key
}

In [3]:
# Parse the JSON and retrieve data
response = requests.get(url,headers=headers)
all_stations_data = response.json()

In [4]:
#convert the data to a DataFrame using pandas
weather_stations = all_stations_data['collection']
df = pd.DataFrame(weather_stations)

# Flatten nested 'capabilities'columns
df = pd.concat([df.drop(['capabilities'], axis=1), json_normalize(df['capabilities'])], axis=1)

# Drop rows with NA values in specific columns
columns_to_clean = ['stationCode', 'stationName', 'latitude', 'longitude']
df_cleaned = df.dropna(subset=columns_to_clean)

df

Unnamed: 0,stationCode,stationName,latitude,longitude,altitude,owner,ownerCode,startDate,endDate,probeHeight,...,wind2,wind3,apparentTemperature,etoShort,etoTall,frostCondition,heatCondition,windErosionCondition,richardsonUnit,chillHour
0,AN001,Allanooka,-29.063612,114.997161,131.0,WA Department of Primary Industries and Region...,DPIRD,2012-06-19,,1.25,...,False,False,True,True,True,True,True,True,True,True
1,AM001,Amelup,-34.270827,118.268523,200.0,WA Department of Primary Industries and Region...,DPIRD,2019-10-09,,1.25,...,False,False,True,True,True,True,True,True,True,True
2,SH002,Babakin,-32.125480,118.004060,313.0,WA Department of Primary Industries and Region...,DPIRD,2016-06-22,,1.25,...,False,False,True,True,True,True,True,True,True,True
3,BA,Badgingarra,-30.338049,115.539491,284.0,WA Department of Primary Industries and Region...,DPIRD,2008-11-19,,1.25,...,False,False,True,True,True,True,True,True,True,True
4,BP001,Balingup,-33.796200,116.063980,227.0,WA Department of Primary Industries and Region...,DPIRD,2014-10-24,,1.25,...,False,False,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220,MA003,Yanmah,-34.225653,116.017767,295.0,WA Department of Primary Industries and Region...,DPIRD,2012-12-20,,1.25,...,False,False,True,True,True,True,True,True,True,True
221,YS,Yilgarn,-31.915622,119.256055,468.0,WA Department of Primary Industries and Region...,DPIRD,2008-11-01,,1.25,...,False,False,True,True,True,True,True,True,True,True
222,YE001,York East,-31.835878,116.921077,229.0,WA Department of Primary Industries and Region...,DPIRD,2013-11-08,,1.25,...,False,False,True,True,True,True,True,True,True,True
223,YU001,Yuna,-28.337630,114.989800,329.0,WA Department of Primary Industries and Region...,DPIRD,2012-06-21,,1.25,...,False,False,True,True,True,True,True,True,True,True


In [5]:
# Assuming 'stationCode' is the column you want to extract
station_code_list = df['stationCode'].tolist()

In [20]:
station_code_list1=station_code_list[0:2]
station_code_list1

['AN001', 'AM001']

In [21]:
# Set the API base URL
url = "https://api.dpird.wa.gov.au/v2/weather/stations/summaries/daily?"
# Headers with API key
headers = {
    "api_key": api_key
}

In [22]:
# Initialize an empty list to store the data for all stations
all_stations_data = []

# Iterate through each station
for station in station_code_list1:
    # Parameters for the specific station
    params = {
        "endDate": "2023-12-29",
        "group": "api",
        "includeClosed": "false",
        "limit": 225,
        "offset": 0,
        "startDate": "2023-01-01",
        "stationCode": station
    }

    # Make the request for each station
    response = requests.get(url, params=params, headers=headers)
    data = response.json()

    # Extract relevant information for each summary
    for summary in data["collection"][0]["summaries"]:
        date_str = f"{summary['period']['year']}-{summary['period']['month']}-{summary['period']['day']}"
        station_info = {
            "date": pd.to_datetime(date_str),
            "stationCode": data["collection"][0]["stationCode"],
            "stationName": data["collection"][0]["stationName"],
            "rainfall": summary["rainfall"],
            "relativeHumidity": summary["relativeHumidity"]["avg"],
            "airTemperature_avg": summary["airTemperature"]["avg"],
            "wind_avg_speed": summary["wind"][0]["avg"]["speed"]
        }

        # Append the information for the current station and summary to the list
        all_stations_data.append(station_info)

In [23]:
# Convert the list of dictionaries into a DataFrame
all_stations = pd.DataFrame(all_stations_data)
all_stations

Unnamed: 0,date,stationCode,stationName,rainfall,relativeHumidity,airTemperature_avg,wind_avg_speed
0,2023-01-01,AN001,Allanooka,0.0,66.1,21.0,17.93
1,2023-01-02,AN001,Allanooka,0.0,52.2,25.2,16.49
2,2023-01-03,AN001,Allanooka,0.0,19.1,32.2,24.77
3,2023-01-04,AN001,Allanooka,0.0,22.9,32.1,21.67
4,2023-01-05,AN001,Allanooka,0.0,17.8,33.3,18.12
...,...,...,...,...,...,...,...
445,2023-08-09,AM001,Amelup,6.0,80.7,9.6,6.52
446,2023-08-10,AM001,Amelup,0.0,90.7,7.7,3.49
447,2023-08-11,AM001,Amelup,1.2,85.9,9.4,5.53
448,2023-08-12,AM001,Amelup,0.4,89.6,10.4,11.45


In [24]:
data

{'metadata': {'status': 200,
  'links': [{'rel': 'self',
    'href': 'https://api.dpird.wa.gov.au/v2/weather/stations/summaries/daily?endDate=2023-12-29&group=api&includeClosed=false&limit=225&offset=0&startDate=2023-01-01&stationCode=AM001'}],
  'collection': {'count': 363,
   'limit': 225,
   'offset': 0,
   'links': [{'rel': 'first', 'href': None},
    {'rel': 'previous', 'href': None},
    {'rel': 'next',
     'href': 'https://api.dpird.wa.gov.au/v2/weather/stations/summaries/daily?endDate=2023-12-29&group=api&includeClosed=false&limit=225&startDate=2023-01-01&stationCode=AM001&offset=225'},
    {'rel': 'last',
     'href': 'https://api.dpird.wa.gov.au/v2/weather/stations/summaries/daily?endDate=2023-12-29&group=api&includeClosed=false&limit=225&startDate=2023-01-01&stationCode=AM001&offset=225'}]}},
 'collection': [{'stationCode': 'AM001',
   'stationName': 'Amelup',
   'summaries': [{'period': {'from': '2022-12-31T16:00:00Z',
      'to': '2023-01-01T16:00:00Z',
      'year': 2023