In [1]:
import pandas as pd
import numpy as np
import requests
from tqdm import tqdm

pd.set_option('display.max_columns',None)

### Fetching Traffic Dataset

In [2]:
BASE_URL = "http://webtris.nationalhighways.co.uk/api/v1.0/"
SITE = 8184 #TMU Site 7004/1 on A46 northbound between A435 near Bishop's Cleeve and A44 near Evesham (south); GPS Ref: 51.46693541,-0.498590705; Northbound
START_DATE = "01062013"
END_DATE = "21052023"
PAGE_SIZE = 40000

In [6]:
def FetchTrafficDataFromAPI(base_url, site, start_date, end_date, page_size):
    def generator():
        while True:
            yield
            
    df = None
    for i,_ in enumerate(tqdm(generator())):
        url = f"{base_url}/reports/daily?sites={site}&start_date={start_date}&end_date={end_date}&page={i+1}&page_size={page_size}"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            temp = pd.DataFrame(data['Rows'])
            if isinstance(df, pd.DataFrame):
                df = pd.concat([df,temp])
            else:
                df = temp
        if df['Report Date'].max()=='2023-04-01T00:00:00':
            break
    return df

In [7]:
traffic = FetchTrafficDataFromAPI(BASE_URL, SITE, START_DATE, END_DATE, PAGE_SIZE)
traffic.shape

7it [04:11, 35.97s/it]


(290705, 24)

In [8]:
traffic['Report Date'].min(), traffic['Report Date'].max()

('2013-06-01T00:00:00', '2023-04-01T00:00:00')

In [16]:
traffic = traffic[["Site Name","Report Date","Time Period Ending","Avg mph","Total Volume"]]

In [17]:
traffic.head()

Unnamed: 0,Site Name,Report Date,Time Period Ending,Avg mph,Total Volume
0,7004/1,2013-06-01T00:00:00,00:59:00,,69
1,7004/1,2013-06-01T00:00:00,01:59:00,,33
2,7004/1,2013-06-01T00:00:00,02:59:00,,23
3,7004/1,2013-06-01T00:00:00,03:59:00,,24
4,7004/1,2013-06-01T00:00:00,04:59:00,,26


In [19]:
traffic.to_excel('dataset/traffic_data_2013-23.xlsx', index=False)

### Fetching Weather Dataset

In [26]:
BASE_URL = "https://archive-api.open-meteo.com/v1/archive?"
LATITUDE = 51.4669
LONGITUDE = -0.4986
START_DATE = "2013-06-01"
END_DATE = "2023-03-31"
REQUIREMENTS = "temperature_2m,relativehumidity_2m,precipitation,rain,snowfall,weathercode,windspeed_10m"

In [29]:
def FetchWeatherDataFromAPI(base_url, latitude, longitude, start_date, end_date, requirements):   
    url = f"{base_url}latitude={latitude}&longitude={longitude}&start_date={start_date}&end_date={end_date}&hourly={requirements}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        df = pd.DataFrame(data['hourly'])
    else:
        print('Error: ', response.status_code)
    return df

In [31]:
weather = FetchWeatherDataFromAPI(BASE_URL, LATITUDE, LONGITUDE, START_DATE, END_DATE, REQUIREMENTS)
weather.shape

(86184, 8)

In [32]:
weather.head()

Unnamed: 0,time,temperature_2m,relativehumidity_2m,precipitation,rain,snowfall,weathercode,windspeed_10m
0,2013-06-01T00:00,11.7,78,0.0,0.0,0.0,1,18.5
1,2013-06-01T01:00,10.9,80,0.0,0.0,0.0,0,17.9
2,2013-06-01T02:00,10.2,81,0.0,0.0,0.0,0,17.6
3,2013-06-01T03:00,9.5,80,0.0,0.0,0.0,0,16.7
4,2013-06-01T04:00,8.9,79,0.0,0.0,0.0,0,16.1


In [33]:
weather.to_excel('dataset/weather_data_2013-23.xlsx', index=False)