In [1]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.poolmanager import PoolManager
import ssl
import pandas as pd
import time
from datetime import datetime, timedelta

# Define the base API endpoint
base_url = 'https://api.cepik.gov.pl/pojazdy'

# Define rate limiting variables
MAX_REQUESTS_PER_SECOND = 20
MAX_REQUESTS_PER_MINUTE = 100

# Track request counts and timestamps
request_count = 0
minute_start_time = datetime.now()
second_start_time = datetime.now()

# Create a session with custom SSL context
class SSLAdapter(requests.adapters.HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        context = ssl.create_default_context()
        context.set_ciphers("DEFAULT:@SECLEVEL=1")
        kwargs['ssl_context'] = context
        return super(SSLAdapter, self).init_poolmanager(*args, **kwargs)

# Create a session
session = requests.Session()
session.mount('https://', SSLAdapter())

# Function to fetch data with retries and handle rate limiting
def fetch_data_with_retries(url, params=None, retries=5):
    global request_count, minute_start_time, second_start_time
    
    # Check if we need to enforce rate limiting
    now = datetime.now()
    if (now - second_start_time).total_seconds() >= 1:
        second_start_time = now
        request_count = 0
    
    if (now - minute_start_time).total_seconds() >= 60:
        minute_start_time = now
        request_count = 0
    
    # Wait if we have reached the rate limits
    while request_count >= MAX_REQUESTS_PER_SECOND:
        time.sleep(0.1)  # Wait for 0.1 second
        now = datetime.now()
        if (now - second_start_time).total_seconds() >= 1:
            second_start_time = now
            request_count = 0

    while request_count >= MAX_REQUESTS_PER_MINUTE:
        time.sleep(0.1)  # Wait for 1 second
        now = datetime.now()
        if (now - minute_start_time).total_seconds() >= 60:
            minute_start_time = now
            request_count = 0

    # Perform the request
    for attempt in range(retries):
        response = session.get(url, params=params, headers={'accept': 'application/json'})
        request_count += 1
        
        if response.status_code == 200:
            return response.json()
        elif response.status_code == 429:
            wait_time = 2 ** attempt  # Exponential backoff
            print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break
    
    return None

# Fetch vehicle IDs
first_page = 1
last_page = 2
vehicles_ids = []

for page in range(first_page, last_page + 1):
    params = {
        'wojewodztwo': '30',
        'data-od': '20190101',
        'data-do': '20191231',
        'page': f'{page}'
    }

    data = fetch_data_with_retries(base_url, params=params)

    if data:
        for item in data['data']:
            id = item['id']
            vehicles_ids.append(id)

# Fetch vehicle data and create DataFrame
df_list = []

for vehicle_id in vehicles_ids:
    url = f'{base_url}/{vehicle_id}'
    data = fetch_data_with_retries(url)

    if data:
        attributes = data['data']['attributes']
        df_list.append(pd.DataFrame([attributes]))
    else:
        print(f"Failed to retrieve data for vehicle ID {vehicle_id}")

# Concatenate all DataFrames in the list into a single DataFrame
if df_list:
    df = pd.concat(df_list, ignore_index=True)
    
else:
    print("No data was retrieved.")


Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1 seconds...
Rate limit exceeded. Retrying in 1

In [2]:
df.head(10)

Unnamed: 0,marka,kategoria-pojazdu,typ,model,wariant,wersja,rodzaj-pojazdu,podrodzaj-pojazdu,przeznaczenie-pojazdu,pochodzenie-pojazdu,...,data-wprowadzenia-danych,rejestracja-wojewodztwo,rejestracja-gmina,rejestracja-powiat,wlasciciel-wojewodztwo,wlasciciel-powiat,wlasciciel-gmina,wlasciciel-wojewodztwo-kod,wojewodztwo-kod,poziom-emisji-co2-paliwo-alternatywne-1
0,BMW,0,---,X1,---,---,SAMOCHÓD OSOBOWY,KOMBI,---,UŻYW. IMPORT INDYW,...,,WIELKOPOLSKIE,KĘPNO,KĘPIŃSKI,,,,,30,
1,OPEL,11111111,---,ASTRA,---,---,SAMOCHÓD OSOBOWY,WIELOZADANIOWY,---,UŻYW. IMPORT INDYW,...,,WIELKOPOLSKIE,KONIN,KONIN,,,,,30,
2,BMW,13,3-V,320D XDRIVE,8T51,6AW500L0,SAMOCHÓD OSOBOWY,KARETA (SEDAN),---,UŻYW. ZAKUPIONY W KRAJU,...,,WIELKOPOLSKIE,POZNAŃ-STARE MIASTO,POZNAŃ,,,,,30,
3,ISUZU/ROMCAR,19,N1R/N,---,87A/NW,BBABMA0087CF,SAMOCHÓD CIĘŻAROWY,WYWROTKA,---,UŻYW. ZAKUPIONY W KRAJU,...,,WIELKOPOLSKIE,xxxxxxxxx,xxxxxxxxx,,,,,30,
4,TOYOTA,13,"XA4(EU,M)",RAV4,AVA42(N),AVA42L-ANXMBW(5T),SAMOCHÓD OSOBOWY,WIELOZADANIOWY,---,UŻYW. ZAKUPIONY W KRAJU,...,,WIELKOPOLSKIE,POZNAŃ-STARE MIASTO,POZNAŃ,,,,,30,
5,AUDI,11111111,---,A4,---,---,SAMOCHÓD OSOBOWY,KARETA (SEDAN),---,UŻYW. IMPORT INDYW,...,,WIELKOPOLSKIE,KONIN,KONIN,,,,,30,
6,SMART,11111111,---,MC 01,---,---,SAMOCHÓD OSOBOWY,HATCHBACK,---,UŻYW. ZAKUPIONY W KRAJU,...,,WIELKOPOLSKIE,POZNAŃ-JEŻYCE,POZNAŃ,,,,,30,
7,VOLKSWAGEN,11111111,---,NEW BEETLE,---,---,SAMOCHÓD OSOBOWY,HATCHBACK,---,UŻYW. IMPORT INDYW,...,,WIELKOPOLSKIE,GNIEZNO,GNIEŹNIEŃSKI,,,,,30,
8,AUDI,11111111,---,A3 SPORTBACK,---,---,SAMOCHÓD OSOBOWY,HATCHBACK,---,UŻYW. ZAKUPIONY W KRAJU,...,,WIELKOPOLSKIE,POZNAŃ-JEŻYCE,POZNAŃ,,,,,30,
9,VOLVO,0,---,FH,---,---,CIĄGNIK SAMOCHODOWY,SIODŁOWY,---,UŻYW. ZAKUPIONY W KRAJU,...,,WIELKOPOLSKIE,POZNAŃ-STARE MIASTO,POZNAŃ,,,,,30,


In [3]:
df.shape

(200, 68)

In [1]:
import numpy as np

# Example data
actual_revenues = [500, 600, 550, 620, 580, 600, 590, 610, 630, 620]  # Actual revenues
forecasted_revenues = [510, 610, 560, 630, 590, 610, 600, 620, 640, 630]  # Forecasted revenues

# Mean Absolute Error (MAE)
def mean_absolute_error(actual, forecasted):
    return np.mean(np.abs(np.array(actual) - np.array(forecasted)))

# Mean Squared Error (MSE)
def mean_squared_error(actual, forecasted):
    return np.mean((np.array(actual) - np.array(forecasted)) ** 2)

# Root Mean Squared Error (RMSE)
def root_mean_squared_error(actual, forecasted):
    return np.sqrt(mean_squared_error(actual, forecasted))

# Mean Absolute Percentage Error (MAPE)
def mean_absolute_percentage_error(actual, forecasted):
    return np.mean(np.abs((np.array(actual) - np.array(forecasted)) / np.array(actual))) * 100

# Calculate errors
mae = mean_absolute_error(actual_revenues, forecasted_revenues)
mse = mean_squared_error(actual_revenues, forecasted_revenues)
rmse = root_mean_squared_error(actual_revenues, forecasted_revenues)
mape = mean_absolute_percentage_error(actual_revenues, forecasted_revenues)

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")


Mean Absolute Error (MAE): 10.00
Mean Squared Error (MSE): 100.00
Root Mean Squared Error (RMSE): 10.00
Mean Absolute Percentage Error (MAPE): 1.70%
