# ARIMA models explainability

This notebook focuses on one ARIMA_PLUS_XREG model trained on one fold. It describes coefficients, training information and explanations for forecasts

In [1]:
from google.cloud import bigquery

In [21]:
PROJECT_ID = "luciexu"
BQ_DATASET = "citibike"
model_id = "citibike-arima-fold-3-2025-11-04-09-44-52"

BQ_TABLE = "citibike_trips_prepped-fold-3-2025-11-04-09-44-52"
SERIES_COLUMN = "time_serie_id"
TIME_COLUMN = "monday_of_week"
TARGET_COLUMN = "qty"
COVARIATE_COLUMNS = [
    "start_station_id",
    "start_station_latitude",
    "start_station_longitude",
    "end_station_id",
    "end_station_latitude",
    "end_station_longitude",
    "percentage_rain"
]
FORECAST_HORIZON_LENGTH = 10

bq = bigquery.Client(project = PROJECT_ID)

## Model information

In [8]:
query = f"""
    SELECT *
    FROM ML.ARIMA_COEFFICIENTS(MODEL `{PROJECT_ID}.{BQ_DATASET}.{model_id}`)
"""
coefficients = bq.query(query).to_dataframe()
coefficients



Unnamed: 0,time_serie_id,ar_coefficients,ma_coefficients,intercept_or_drift,processed_input,weight,category_weights
0,10avew28st_10avew28st,[],[],,start_station_id,0.000000,[]
1,10avew28st_10avew28st,[],[],,start_station_latitude,-0.020241,[]
2,10avew28st_10avew28st,[],[],,start_station_longitude,0.000000,[]
3,10avew28st_10avew28st,[],[],,end_station_id,0.000935,[]
4,10avew28st_10avew28st,[],[],,end_station_latitude,0.000000,[]
...,...,...,...,...,...,...,...
1793016,yorkstjayst_yorkstjayst,[],[],,end_station_id,0.005979,[]
1793017,yorkstjayst_yorkstjayst,[],[],,end_station_latitude,-0.020157,[]
1793018,yorkstjayst_yorkstjayst,[],[],,end_station_longitude,0.000000,[]
1793019,yorkstjayst_yorkstjayst,[],[],,percentage_rain,-0.761810,[]


In [9]:
query = f"""
    SELECT *
    FROM ML.FEATURE_INFO(MODEL `{PROJECT_ID}.{BQ_DATASET}.{model_id}`)
"""
featureInfo = bq.query(query).to_dataframe()
featureInfo.head()



Unnamed: 0,input,min,max,mean,median,stddev,category_count,null_count,dimension
0,qty,1.0,838.0,4.038904,2.0,5.660052,,0,
1,time_serie_id,,,,,,203674.0,0,
2,start_station_id,72.0,3266.0,633.292144,412.0,768.873268,,0,
3,start_station_latitude,40.646678,40.787209,40.733041,40.734011,0.022,,0,
4,start_station_longitude,-74.02545,-73.928504,-73.988276,-73.989129,0.014769,,0,


In [10]:
query = f"""
    SELECT *
    FROM ML.TRAINING_INFO(MODEL `{PROJECT_ID}.{BQ_DATASET}.{model_id}`)
"""
trainingInfo = bq.query(query).to_dataframe()
trainingInfo.head()



Unnamed: 0,training_run,iteration,duration_ms
0,0,0,10570


## Explanability

In [11]:
query = f"""
    SELECT *
    FROM ML.HOLIDAY_INFO(
        MODEL `{PROJECT_ID}.{BQ_DATASET}.{model_id}`)
"""
holiday_info = bq.query(query).to_dataframe()
holiday_info



Unnamed: 0,region,holiday_name,primary_date,preholiday_days,postholiday_days
0,GLOBAL,AllSaintsDay-Halloween,2000-10-31,1,1
1,GLOBAL,AllSaintsDay-Halloween,2001-10-31,1,1
2,GLOBAL,AllSaintsDay-Halloween,2002-10-31,1,1
3,GLOBAL,AllSaintsDay-Halloween,2003-10-31,1,1
4,GLOBAL,AllSaintsDay-Halloween,2004-10-31,1,1
...,...,...,...,...,...
1901,US,VeteranDay,2026-11-11,1,1
1902,US,VeteranDay,2027-11-11,1,1
1903,US,VeteranDay,2028-11-11,1,1
1904,US,VeteranDay,2029-11-11,1,1


In [23]:
chosen_id = "1avee30st_stmarkspl1ave"
query = f"""
    SELECT *, 
        EXTRACT(DATE FROM time_series_timestamp) AS {TIME_COLUMN}
    FROM ML.EXPLAIN_FORECAST(
        MODEL `{PROJECT_ID}.{BQ_DATASET}.{model_id}`,
        STRUCT({FORECAST_HORIZON_LENGTH} AS horizon, 0.95 AS confidence_level),
        (
            SELECT {SERIES_COLUMN}, {TIME_COLUMN}, {TARGET_COLUMN},
                {', '.join(COVARIATE_COLUMNS)}
            FROM `{PROJECT_ID}.{BQ_DATASET}.{BQ_TABLE}`
        )
    )
    WHERE {SERIES_COLUMN} = '{chosen_id}'
    ORDER BY {TIME_COLUMN}
"""
print(query)
explain = bq.query(query).to_dataframe()
explain.tail()


    SELECT *, 
        EXTRACT(DATE FROM time_series_timestamp) AS monday_of_week
    FROM ML.EXPLAIN_FORECAST(
        MODEL `luciexu.citibike.citibike-arima-fold-3-2025-11-04-09-44-52`,
        STRUCT(10 AS horizon, 0.95 AS confidence_level),
        (
            SELECT time_serie_id, monday_of_week, qty,
                start_station_id, start_station_latitude, start_station_longitude, end_station_id, end_station_latitude, end_station_longitude, percentage_rain
            FROM `luciexu.citibike.citibike_trips_prepped-fold-3-2025-11-04-09-44-52`
        )
    )
    WHERE time_serie_id = '1avee30st_stmarkspl1ave'
    ORDER BY monday_of_week



Unnamed: 0,time_serie_id,time_series_timestamp,time_series_type,time_series_data,time_series_adjusted_data,standard_error,confidence_level,prediction_interval_lower_bound,prediction_interval_upper_bound,trend,...,holiday_effect_US_ElectionDay,attribution_start_station_id,attribution_start_station_latitude,attribution_start_station_longitude,attribution_end_station_id,attribution_end_station_latitude,attribution_end_station_longitude,attribution_percentage_rain,attribution___INTERCEPT__,monday_of_week
162,1avee30st_stmarkspl1ave,2016-08-08 00:00:00+00:00,forecast,2.056178,2.056178,0.910136,0.95,0.275536,3.836821,-0.170239,...,0.0,1.151944,0.191326,1.85171e-10,-0.312399,-0.296148,0.232774,0.355493,0.903427,2016-08-08
163,1avee30st_stmarkspl1ave,2016-08-15 00:00:00+00:00,forecast,1.964,1.964,0.910136,0.95,0.183357,3.744642,-0.169912,...,0.0,1.151944,0.191326,1.85171e-10,-0.312399,-0.296148,0.232774,0.262987,0.903427,2016-08-15
164,1avee30st_stmarkspl1ave,2016-08-22 00:00:00+00:00,forecast,2.275166,2.275166,0.910136,0.95,0.494523,4.055808,-0.169827,...,0.0,1.361039,0.191287,1.852033e-10,-0.449381,-0.296179,0.232779,0.502022,0.903427,2016-08-22
165,1avee30st_stmarkspl1ave,2016-08-29 00:00:00+00:00,forecast,2.275188,2.275188,0.910136,0.95,0.494545,4.05583,-0.169806,...,0.0,1.361039,0.191287,1.852033e-10,-0.449381,-0.296179,0.232779,0.502022,0.903427,2016-08-29
166,1avee30st_stmarkspl1ave,2016-09-05 00:00:00+00:00,forecast,2.33618,2.33618,0.910136,0.95,0.555537,4.116822,-0.1698,...,0.0,1.151944,0.191326,1.85171e-10,-0.312399,-0.296148,0.232774,0.635056,0.903427,2016-09-05
