In [1]:
# CELL 1: Imports and Configuration
# ==============================================================================
import pandas as pd
import sqlite3
import joblib
import os
import warnings

warnings.filterwarnings('ignore')

In [2]:
# --- Configuration ---
DB_PATH = '../database/ELECTIONS.db'
MODELS_DIR = '../models'
PREPROCESSOR_PATH = '../database/preprocessor_X.joblib'
LABEL_ENCODER_PATH = '../database/label_encoder_y.joblib'

# Nom du meilleur modèle (doit correspondre au fichier sauvegardé à l'étape 1)
BEST_MODEL_FILENAME = 'random_forest_predictor.joblib'
MODEL_PATH = os.path.join(MODELS_DIR, BEST_MODEL_FILENAME)

In [3]:
# CELL 2: Load Model and Transformers
# ==============================================================================
print("Loading model and transformers...")
try:
    model = joblib.load(MODEL_PATH)
    preprocessor_X = joblib.load(PREPROCESSOR_PATH)
    label_encoder_y = joblib.load(LABEL_ENCODER_PATH)
    print(f"✅ Model '{BEST_MODEL_FILENAME}' and transformers loaded successfully.")
except FileNotFoundError:
    print(f"❌ Error: Model file not found at {MODEL_PATH}.")
    print("Please run the 'model_training.ipynb' notebook first to generate the model file.")
    exit()
except Exception as e:
    print(f"An error occurred while loading files: {e}")
    exit()

Loading model and transformers...
✅ Model 'random_forest_predictor.joblib' and transformers loaded successfully.


In [4]:
# CELL 3: Load 2024 Data to be Used for 2027 Prediction
# ==============================================================================
print("\nLoading 2024 data as a proxy for 2027...")
try:
    con = sqlite3.connect(DB_PATH)
    # On utilise la même requête que pour le prétraitement pour garantir la cohérence
    query = """
    SELECT
        DEPARTMENT_CODE,
        ROUND(AVG(POVERTY_RATE), 2) as avg_poverty_rate,
        ROUND(AVG(UNEMPLOYMENT_RATE), 2) as avg_unemployment_rate,
        ROUND(AVG(IMMIGRATION_RATE), 2) as avg_immigration_rate,
        ROUND(AVG(NUMBER_OF_VICTIMS), 0) as avg_number_of_victims
    FROM ELECTIONS_ALL
    WHERE YEAR = 2024
    GROUP BY DEPARTMENT_CODE
    """
    prediction_input_df = pd.read_sql_query(query, con)
    con.close()
    print(f"Loaded {len(prediction_input_df)} rows from year 2024 to serve as prediction base.")
except Exception as e:
    print(f"Error loading data: {e}")
    exit()


Loading 2024 data as a proxy for 2027...
Loaded 94 rows from year 2024 to serve as prediction base.


In [5]:
# Garder une copie des codes département pour plus tard
department_codes = prediction_input_df['DEPARTMENT_CODE'].copy()
features_to_predict = prediction_input_df.drop(columns=['DEPARTMENT_CODE'])

In [7]:
# CELL 4: Preprocess Data and Make Predictions
# Le préprocesseur s'attend à recevoir les données avec la même structure que les données d'entraînement.
# Cela inclut la colonne 'DEPARTMENT_CODE' qu'il doit encoder.
# Nous ne devons donc PAS la supprimer ici.
features_to_predict = prediction_input_df
# ==============================================================================
# Nous gardons une copie des codes département à part, pour l'utiliser dans notre table de résultats finale.
department_codes = prediction_input_df['DEPARTMENT_CODE'].copy()

print("\nPreprocessing data and making predictions for 2027...")
# Appliquer la transformation apprise (sans la ré-apprendre)
X_processed = preprocessor_X.transform(features_to_predict)

# Prédire les résultats encodés (ex: 0, 1, 2...)
predictions_encoded = model.predict(X_processed)

# Décoder les prédictions pour retrouver les noms des partis
predictions_decoded = label_encoder_y.inverse_transform(predictions_encoded)
print("✅ Predictions generated.")


Preprocessing data and making predictions for 2027...
✅ Predictions generated.


In [8]:
# CELL 5: Create the 2027 Predictions DataFrame
# ==============================================================================
predictions_2027_df = pd.DataFrame({
    'YEAR': 2027,
    'DEPARTMENT_CODE': department_codes,
    'WINNER': predictions_decoded
})
print("\n--- Predicted Winners for 2027 ---")
print(predictions_2027_df.head())


--- Predicted Winners for 2027 ---
   YEAR DEPARTMENT_CODE    WINNER
0  2027              01    GAUCHE
1  2027              02  E.DROITE
2  2027              03    GAUCHE
3  2027              04    GAUCHE
4  2027              05    CENTRE


In [9]:
# CELL 6: Load Historical Data
# ==============================================================================
print("\nLoading historical election results...")
try:
    con = sqlite3.connect(DB_PATH)
    # On prend les résultats uniques par année et département
    query = """
    SELECT
        YEAR,
        DEPARTMENT_CODE,
        WINNER
    FROM ELECTIONS_ALL
    WHERE WINNER IS NOT NULL
    GROUP BY YEAR, DEPARTMENT_CODE, WINNER
    """
    historical_results_df = pd.read_sql_query(query, con)
    con.close()
    print(f"✅ Loaded {len(historical_results_df)} historical result rows.")
except Exception as e:
    print(f"Error loading historical data: {e}")
    exit()


Loading historical election results...
✅ Loaded 281 historical result rows.


In [10]:
# CELL 7: Combine Historical Data with 2027 Predictions and Save
# ==============================================================================
print("\nCombining historical data with 2027 predictions...")
# Concaténer les résultats historiques et les nouvelles prédictions
final_bi_table = pd.concat([historical_results_df, predictions_2027_df], ignore_index=True)

print(f"Final table for BI contains {len(final_bi_table)} rows, from year {final_bi_table['YEAR'].min()} to {final_bi_table['YEAR'].max()}.")


Combining historical data with 2027 predictions...
Final table for BI contains 375 rows, from year 2017 to 2027.


In [11]:
# --- Sauvegarde dans la base de données ---
try:
    con = sqlite3.connect(DB_PATH)
    # 'if_exists='replace'' va écraser la table si elle existe déjà.
    # C'est pratique pour pouvoir relancer le script.
    final_bi_table.to_sql('ELECTION_RESULTS_FOR_BI', con, if_exists='replace', index=False)
    con.close()
    print("\n✅ Successfully created/updated the 'ELECTION_RESULTS_FOR_BI' table.")
    print("You can now connect this table to Power BI or any other BI tool.")
except Exception as e:
    print(f"\n❌ Error saving final table to database: {e}")


✅ Successfully created/updated the 'ELECTION_RESULTS_FOR_BI' table.
You can now connect this table to Power BI or any other BI tool.
