In [38]:
# Imports using Sklearn make shortcut functions
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder, OneHotEncoder
from sklearn.pipeline import make_pipeline
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from google.cloud import bigquery
from datetime import datetime, timedelta

In [39]:
# Print current time
now = datetime.now()
current_time = now.strftime("%Y-%m-%d %H:%M:%S")

In [40]:
# Imports data
data = pd.read_csv('../processed_data/1988_to_2024_combined_clean_polling_and_results.csv')

In [41]:
data.rename(columns={'OTH_PERCENTAGE': 'OTH_ACT'}, inplace=True)

In [42]:
# Convert date columns to datetime
data['enddate'] = pd.to_datetime(data['enddate'])
data['next_elec_date'] = pd.to_datetime(data['next_elec_date'])
data['startdate'] = pd.to_datetime(data['startdate'])

In [43]:
# Handle numerical transformer
num_columns_selector = ['samplesize', 'months_to_elec_weight']
num_transformer = MinMaxScaler()

In [44]:
# Handle categorical transformer
cat_columns_selector = ['rating']
cat_transformer = make_pipeline(OrdinalEncoder(categories = [['F','F+','E-','E','E+','D-','D','D+','C-','C','C+','B-','B','B+','A-']]),MinMaxScaler())

In [45]:
# One-hot encoder for party_in_power
ohe = OneHotEncoder()

In [46]:
# Build the preprocessing pipeline
preproc_pipeline = make_column_transformer(
    (num_transformer, num_columns_selector),
    (cat_transformer, cat_columns_selector),
    (ohe,['party_in_power']),
    remainder='passthrough',
    verbose_feature_names_out=False
)

In [47]:
data['next_elec_date'].value_counts()

next_elec_date
2015-05-07    1930
2024-07-04    1362
2010-05-06     584
1992-04-09     453
2019-12-12     440
1997-05-01     338
2017-06-08     254
2001-06-07     213
2005-05-05      98
Name: count, dtype: int64

In [48]:
# Define election dates
election_date_2019 = datetime.strptime('2010-05-06', '%Y-%m-%d')
cutoff_date = election_date_2019 - timedelta(days=84)
prediction_date = election_date_2019 - timedelta(days=24)

In [49]:
# Split data
data_train = data[data['startdate'] < cutoff_date]
data_test_1 = data[(data['startdate'] >= cutoff_date) & (data['startdate'] < prediction_date)]
data_test = data_test_1[data_test_1['next_elec_date'] == election_date_2019]


In [50]:
# Fit transform preprocessing pipeline to data_train
data_train_processed = preproc_pipeline.fit_transform(data_train)


In [51]:
# Transform preprocessing pipeline to data_test
data_test_processed = preproc_pipeline.transform(data_test)

In [52]:
# Check feature names make sense for data_train
data_train_processed = pd.DataFrame(
    data_train_processed, columns=preproc_pipeline.get_feature_names_out()
)

# Check feature names make sense for data_test
data_test_processed = pd.DataFrame(
    data_test_processed, columns=preproc_pipeline.get_feature_names_out()
)

In [53]:
# Define our X by dropping irrelevant and y columns
X_train = data_train_processed.drop(columns=['startdate', 'enddate', 'pollster', 'Unnamed: 0', 'next_elec_date', 'days_to_elec', 'months_to_elec', 'party_in_power_Labour', 'LAB_ACT', 'CON_ACT', 'LIB_ACT', 'GRE_ACT', 'BRX_ACT', 'SNP_ACT', 'UKI_ACT', 'PLC_ACT', 'OTH_ACT'])
X_test = data_test_processed.drop(columns=['startdate', 'enddate', 'pollster', 'Unnamed: 0', 'next_elec_date', 'days_to_elec', 'months_to_elec', 'party_in_power_Labour', 'LAB_ACT', 'CON_ACT', 'LIB_ACT', 'GRE_ACT', 'BRX_ACT', 'SNP_ACT', 'UKI_ACT', 'PLC_ACT', 'OTH_ACT'])

In [54]:
# Build our target matrix
y_train = data_train_processed[['next_elec_date', 'LAB_ACT', 'CON_ACT', 'LIB_ACT', 'GRE_ACT', 'BRX_ACT', 'SNP_ACT', 'UKI_ACT', 'PLC_ACT', 'OTH_ACT']]
y_test = data_test_processed[['next_elec_date', 'LAB_ACT', 'CON_ACT', 'LIB_ACT', 'GRE_ACT', 'BRX_ACT', 'SNP_ACT', 'UKI_ACT', 'PLC_ACT', 'OTH_ACT']]

In [55]:
# Drops y_train actuals where the actual is the actual result we are trying to predict, replaces with NaNs
y_train.loc[y_train['next_elec_date'] == '2010-05-06',
         ['LAB_ACT', 'CON_ACT', 'LIB_ACT', 'GRE_ACT', 'BRX_ACT', 'SNP_ACT', 'UKI_ACT', 'PLC_ACT', 'OTH_ACT']] = np.nan

In [56]:
X_test.tail(10)

Unnamed: 0,samplesize,months_to_elec_weight,rating,party_in_power_Conservative,poll_length,CON_FC,LAB_FC,LIB_FC,BRX_FC,GRE_FC,OTH_FC,PLC_FC,SNP_FC,UKI_FC
76,0.206507,0.831387,1.0,0.0,1,0.4,0.32,0.18,,,0.1,,,
77,0.116423,0.831387,0.5,0.0,2,0.37,0.31,0.2,,,0.11,,,
78,0.111996,0.831387,0.928571,0.0,1,0.37,0.3,0.2,,,0.13,,,
79,0.211819,0.831387,1.0,0.0,1,0.37,0.31,0.2,,,0.12,,,
80,0.333776,0.831387,0.0,0.0,1,0.38,0.28,0.22,,,0.12,,,
81,0.293714,0.831387,1.0,0.0,3,0.39,0.31,0.17,,,0.13,,,
82,0.111554,0.831387,0.928571,0.0,1,0.36,0.31,0.19,,,0.14,,,
83,0.22023,0.831387,1.0,0.0,1,0.39,0.33,0.2,,,0.08,,,
84,0.22687,0.831387,0.571429,0.0,5,0.36,0.27,0.23,,,0.14,,,
85,0.313856,0.831387,0.428571,0.0,6,0.36,0.33,0.22,,,0.09,,,


In [57]:
# Calculates average median value of X_test
averages = X_test.mean()
averages

samplesize                     0.234916
months_to_elec_weight          0.749554
rating                         0.771595
party_in_power_Conservative         0.0
poll_length                    1.848837
CON_FC                         0.378372
LAB_FC                         0.307791
LIB_FC                          0.18907
BRX_FC                              NaN
GRE_FC                              NaN
OTH_FC                         0.124302
PLC_FC                              NaN
SNP_FC                              NaN
UKI_FC                              NaN
dtype: object

In [58]:
# Creates imputation values for y_train to impute over actuals we are trying to predict
imputation_values = {
    'CON_ACT': averages['CON_FC'],
    'LAB_ACT': averages['LAB_FC'],
    'LIB_ACT': averages['LIB_FC'],
    'BRX_ACT': 0, # doesnt exist in 2015
    'GRE_ACT': averages['GRE_FC'],
    'OTH_ACT': averages['OTH_FC'],
    'PLC_ACT':    0.006,
    'SNP_ACT': 0.04, # imputes value due to low forecasts
    'UKI_ACT': averages['UKI_FC']
}

In [59]:
# Applies imputation values to y_train
y_train = y_train.fillna(value=imputation_values)


  y_train = y_train.fillna(value=imputation_values)


In [60]:
# Instantiate the model for each party
models = {}
parties = ['CON', 'LAB', 'LIB', 'BRX', 'GRE', 'SNP', 'UKI', 'PLC', 'OTH']
for party in parties:
    models[party] = XGBRegressor(
        learning_rate=0.3, n_estimators=300, max_depth=3, subsample=0.7,
        objective='reg:squarederror', nthread=-1, enable_categorical=True
    )

In [61]:
# Train models
X_train_matrix = np.array(X_train)
for party in parties:
    models[party].fit(X_train_matrix, y_train[f'{party}_ACT'])

XGBoostError: [21:53:32] /workspace/src/data/data.cc:507: Check failed: valid: Label contains NaN, infinity or a value too large.
Stack trace:
  [bt] (0) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3588ca) [0x7f80ff77a8ca]
  [bt] (1) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38a21d) [0x7f80ff7ac21d]
  [bt] (2) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38ab51) [0x7f80ff7acb51]
  [bt] (3) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGDMatrixSetInfoFromInterface+0xb0) [0x7f80ff5803a0]
  [bt] (4) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7f81b435fe2e]
  [bt] (5) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7f81b435c493]
  [bt] (6) /home/chris/.pyenv/versions/3.10.6/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x12e05) [0x7f81b3402e05]
  [bt] (7) /home/chris/.pyenv/versions/3.10.6/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xc8d0) [0x7f81b33fc8d0]
  [bt] (8) /home/chris/.pyenv/versions/3.10.6/lib/libpython3.10.so.1.0(_PyObject_MakeTpCall+0x8c) [0x7f81b4eb635c]



In [62]:
# Evaluate model performance with cross validation
cv_scores = {}
for party in parties:
    cv_scores[party] = cross_val_score(models[party], X_train_matrix, y_train[f'{party}_ACT']).mean()

ValueError: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 730, in inner_f
    return func(**kwargs)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/sklearn.py", line 1055, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/sklearn.py", line 521, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/sklearn.py", line 958, in _create_dmatrix
    return QuantileDMatrix(
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 730, in inner_f
    return func(**kwargs)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 1529, in __init__
    self._init(
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 1588, in _init
    it.reraise()
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 576, in reraise
    raise exc  # pylint: disable=raising-bad-type
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 557, in _handle_exception
    return fn()
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 641, in <lambda>
    return self._handle_exception(lambda: self.next(input_data), 0)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/data.py", line 1280, in next
    input_data(**self.kwargs)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 730, in inner_f
    return func(**kwargs)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 633, in input_data
    self.proxy.set_info(
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 730, in inner_f
    return func(**kwargs)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 932, in set_info
    self.set_label(label)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 1070, in set_label
    dispatch_meta_backend(self, label, "label", "float")
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/data.py", line 1225, in dispatch_meta_backend
    _meta_from_pandas_series(data, name, dtype, handle)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/data.py", line 545, in _meta_from_pandas_series
    _meta_from_numpy(data, name, dtype, handle)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/data.py", line 1159, in _meta_from_numpy
    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface_str))
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 282, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [21:53:34] /workspace/src/data/data.cc:507: Check failed: valid: Label contains NaN, infinity or a value too large.
Stack trace:
  [bt] (0) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3588ca) [0x7f80ff77a8ca]
  [bt] (1) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38a21d) [0x7f80ff7ac21d]
  [bt] (2) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38ab51) [0x7f80ff7acb51]
  [bt] (3) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGDMatrixSetInfoFromInterface+0xb0) [0x7f80ff5803a0]
  [bt] (4) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7f81b435fe2e]
  [bt] (5) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7f81b435c493]
  [bt] (6) /home/chris/.pyenv/versions/3.10.6/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x12e05) [0x7f81b3402e05]
  [bt] (7) /home/chris/.pyenv/versions/3.10.6/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xc8d0) [0x7f81b33fc8d0]
  [bt] (8) /home/chris/.pyenv/versions/3.10.6/lib/libpython3.10.so.1.0(_PyObject_MakeTpCall+0x8c) [0x7f81b4eb635c]



--------------------------------------------------------------------------------
4 fits failed with the following error:
Traceback (most recent call last):
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 730, in inner_f
    return func(**kwargs)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/sklearn.py", line 1055, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/sklearn.py", line 521, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/sklearn.py", line 958, in _create_dmatrix
    return QuantileDMatrix(
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 730, in inner_f
    return func(**kwargs)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 1529, in __init__
    self._init(
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 1588, in _init
    it.reraise()
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 576, in reraise
    raise exc  # pylint: disable=raising-bad-type
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 557, in _handle_exception
    return fn()
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 641, in <lambda>
    return self._handle_exception(lambda: self.next(input_data), 0)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/data.py", line 1280, in next
    input_data(**self.kwargs)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 730, in inner_f
    return func(**kwargs)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 633, in input_data
    self.proxy.set_info(
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 730, in inner_f
    return func(**kwargs)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 932, in set_info
    self.set_label(label)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 1070, in set_label
    dispatch_meta_backend(self, label, "label", "float")
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/data.py", line 1225, in dispatch_meta_backend
    _meta_from_pandas_series(data, name, dtype, handle)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/data.py", line 545, in _meta_from_pandas_series
    _meta_from_numpy(data, name, dtype, handle)
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/data.py", line 1159, in _meta_from_numpy
    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface_str))
  File "/home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/core.py", line 282, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [21:53:35] /workspace/src/data/data.cc:507: Check failed: valid: Label contains NaN, infinity or a value too large.
Stack trace:
  [bt] (0) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3588ca) [0x7f80ff77a8ca]
  [bt] (1) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38a21d) [0x7f80ff7ac21d]
  [bt] (2) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38ab51) [0x7f80ff7acb51]
  [bt] (3) /home/chris/.pyenv/versions/3.10.6/envs/election_predictor/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGDMatrixSetInfoFromInterface+0xb0) [0x7f80ff5803a0]
  [bt] (4) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7f81b435fe2e]
  [bt] (5) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7f81b435c493]
  [bt] (6) /home/chris/.pyenv/versions/3.10.6/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x12e05) [0x7f81b3402e05]
  [bt] (7) /home/chris/.pyenv/versions/3.10.6/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xc8d0) [0x7f81b33fc8d0]
  [bt] (8) /home/chris/.pyenv/versions/3.10.6/lib/libpython3.10.so.1.0(_PyObject_MakeTpCall+0x8c) [0x7f81b4eb635c]




In [66]:
actuals_2010 = {
    "CON": 0.362,  # Conservative Party
    "LAB": 0.292,  # Labour Party
    "LIB": 0.235,  # Liberal Democrats
    "BRX": 0.000,  # Brexit Party (did not exist in 2010)
    "GRE": 0.009,  # Green Party
    "SNP": 0.019,  # Scottish National Party
    "UKI": 0.030,  # UK Independence Party
    "PLC": 0.005,  # Plaid Cymru
    "OTH": 0.048   # Other parties
}

In [67]:
# Evaluate predictions
X_test_matrix = np.array(X_test)
mean_predictions = {}
for party in parties:
    mean_predictions[party] = models[party].predict(X_test_matrix).mean()


NotFittedError: need to call fit or load_model beforehand

In [71]:
# Print mean predictions
for party in parties:
    print(f"2010 mean prediction for {party}: {mean_predictions[party]}")
    print(f"2010 actual result for {party}  : {actuals_2010[party]}")


2010 mean prediction for CON: 0.3777395188808441
2010 actual result for CON  : 0.362
2010 mean prediction for LAB: 0.3036001920700073
2010 actual result for LAB  : 0.292
2010 mean prediction for LIB: 0.1919867992401123
2010 actual result for LIB  : 0.235
2010 mean prediction for BRX: 0.0
2010 actual result for BRX  : 0.0


KeyError: 'GRE'

In [72]:
# Evaluate last five polls
last_predictions = {}
for party in parties:
    last_predictions[party] = models[party].predict(X_test_matrix)[-5:]

NotFittedError: need to call fit or load_model beforehand

In [73]:
# Print mean of predictions
for party in parties:
    print(f"2010 last prediction for {party}: {last_predictions[party].mean()}")
    print(f"2010 actual result for {party}  : {actuals_2010[party]}")


2010 last prediction for CON: 0.37153130769729614
2010 actual result for CON  : 0.362
2010 last prediction for LAB: 0.30539435148239136
2010 actual result for LAB  : 0.292
2010 last prediction for LIB: 0.19715282320976257
2010 actual result for LIB  : 0.235
2010 last prediction for BRX: 0.0
2010 actual result for BRX  : 0.0


KeyError: 'GRE'