This notebook build predictive models for each municipality

In [1]:
#importer libraries
from sklearn.metrics import mean_squared_error
import os
import tqdm as tqdm
import re
import pickle
from pathlib import Path
import pandas as pd
import numpy as np
# from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline, make_pipeline

from sklearn.model_selection import GridSearchCV, learning_curve, KFold, train_test_split
# from sklearn.model_selection import learning_curve
# from sklearn.model_selection import KFold, train_test_split

from sklearn.compose import make_column_transformer
from sklearn.compose import ColumnTransformer

from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Lasso

from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder

from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score
from sklearn.exceptions import ConvergenceWarning


import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
# a function to add features to data.
def add_features_to_data(df):
    fp = Path("../Feature_data/")
    indk = fp/"indkp101.csv"
    konth = fp/"kontanth.csv"
    areas = fp/"muni_areas.csv"
    pop = fp/"population_data.csv"
    pop_dens = fp/"pop_dens.csv"
    gini = fp/"gini_index.csv"
    unenp = fp/"unemployment_data.csv"
        
    indk = pd.read_csv(indk) # ok
    konth = pd.read_csv(konth) #ok men pr kapita
    gini = pd.read_csv(gini) #OK
    areas = pd.read_csv(areas) # ok
    pop = pd.read_csv(pop) # noget galt
    unenp = pd.read_csv(unenp)
    
    df = df\
        .merge(indk, on=["muni_code", "year"], how = 'left',suffixes=('_left', '_right'))\
        .merge(konth, on= ["muni_code", "year"], how= 'left',suffixes=('_left', '_right'))\
        .merge(gini, on = ["muni_code", "year"], how = 'left',suffixes=('_left', '_right'))\
        .merge(areas, on = ["muni_code"], how = 'left',suffixes=('_left', '_right'))\
        .merge(unenp, on = ["muni_code", "year"], how = 'left',suffixes=('_left', '_right'))\
        .merge(pop, on = ["muni_code","year"], how = 'left',suffixes=('_left', '_right'))\
        .sort_values(["year", "count"])\
        .dropna()\
        .assign(muni_code=lambda x: x['muni_code'].astype('category'))\
        .assign(year=lambda x: x['year'].astype('category'))\
        .assign(housing_type = lambda x: x["housing_type"].astype('category'))\
        .assign(unemployed = lambda x: x["unemployed"]/x['pop'])\
        .assign(kont_recip_tot = lambda x: x["kont_recip_tot"]/x['pop'])\
        .assign(pop_den= lambda x: x['pop']/x['km2'])
    
   

    df['avg_sqm_price'] = pd.to_numeric(df['avg_sqm_price'], errors='coerce')

    #drops very useless columns
    cols_to_drop = [col for col in df.columns if col.startswith('Unnamed')]
    df.drop(columns=cols_to_drop, inplace=True)
    df.drop(columns=["count", "km2", "pop"], inplace =True)

    return (df)



A function for fitting a model is made. This function takes a dataset of cleaned Boliga data, and enrich it with the selected features.

In [3]:
def make_a_model(data):
    # splitting data in target values (y) and features (X)
#     y = data["avg_sqm_price"]
#     X = data.drop(columns=["avg_sqm_price"])
    
    # defines scaler for y-data
#     y_scaler = StandardScaler()
#     y = y_scaler.fit_transform(y.values.reshape(-1, 1)).flatten()
    orig_data = data
    data = add_features_to_data(data).rename(columns={'avg_sqm_price':"y"})
    y_scaler = StandardScaler()
    data['y']= y_scaler.fit_transform(data['y'].values.reshape(-1, 1)).flatten()


    data_train = data[data['year'].astype(int)<=2015]
    data_test =data[data['year'].astype(int)>2015]
    
    X_train = data_train.drop(columns=['y','year', "muni_code"])
    y_train = data_train['y']
    
    X_test = data_test.drop(columns = ['y','year', "muni_code"])
    y_test = data_test['y']
    
    X = data.drop(columns=["y",'year', "muni_code"])
    
    # numeric and categorical features are identified
    numeric_features = X.select_dtypes(include = ["number"]).columns.tolist()
    categorical_features = X.select_dtypes(include=["category"]).columns.tolist()
    # Known categories in the categorical data are identified and stored for use in OneHotEncoder
    known_categories = [X[i].unique().tolist() for i in X.select_dtypes(include=["category"]).columns.tolist()]
    
    # defining transformer for numeric features
    numeric_transformer = Pipeline(steps=[
        ('scaler', StandardScaler(with_mean = False)),
        ('poly', PolynomialFeatures(degree=2))
        ])
    
    # Defining transformer for categorical featuresn
    categorical_transformer = OneHotEncoder(categories=known_categories)
    
   
    
    # Preprocessor defined. Numerical features are scaled, and categorical values OneHotEncoded with the
    # known categories
    preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

    # The training pipeline is defined. Preprocessing as defined above, polynomial feature expansion
    # and Elastic Net as the classifier
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', Lasso())
    ])

    # Paramergrid defined for the gridsearch
    param_grid = {
        'preprocessor__num__poly__degree': [1, 2, 3],
        'classifier__alpha': np.logspace(-4, 4, 12),
        'classifier__max_iter': [2000] 
    }
    # Setting up the GridSearch with pipeline and parametergrid. 5-fold crossvalidation 
    grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
    
    # Searching for optimal hyperparameters.
    grid_search.fit(X_train, y_train)
    
    # grabbing information about the result
    best_parameters = grid_search.best_params_
    best_pipeline = grid_search.best_estimator_
    
    y_pred = y_scaler.inverse_transform(best_pipeline.predict(X_test).reshape(-1, 1)).flatten()
    y_test_inv = y_scaler.inverse_transform(y_test.to_numpy().reshape(-1, 1)).flatten()

    
    rmse = mean_squared_error(y_test_inv, y_pred, squared=False)
    r2 = r2_score(y_test_inv, y_pred)
    coefficients = best_pipeline.named_steps['classifier'].coef_

    # Grabbing names and weights of the polynomial features.
    # First, get names of both numeric and categorical features
    numeric_feature_names = numeric_features
    categorical_feature_names = best_pipeline.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out(categorical_features)
    all_feature_names = np.concatenate([numeric_feature_names, categorical_feature_names])

    
    # For numeriske features
    poly_transformer = best_pipeline.named_steps['preprocessor'].named_transformers_['num'].named_steps['poly']
    numeric_polynomial_feature_names = poly_transformer.get_feature_names_out(input_features=numeric_feature_names)

    # For kategoriske features
    categorical_feature_names = best_pipeline.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out(input_features=categorical_features)

    # Sammenkæd dem
    polynomial_feature_names = np.concatenate([numeric_polynomial_feature_names, categorical_feature_names])

    
    
    # Combining to one object
    coefs =  zip(coefficients, polynomial_feature_names)
    
    # gets data for a learning curve
    train_sizes, train_scores, test_scores = learning_curve(estimator=best_pipeline,
                   X=X_train,
                   y=y_train,
                   train_sizes=np.arange(0.05, 1.05, .05),
                   scoring='neg_mean_squared_error',                 
                   cv=10)
    
    learning_curve_data = pd.DataFrame({'Train':-train_scores.mean(axis=1),
                     'Test':-test_scores.mean(axis=1),
                     'sample size':train_sizes})
    
    # getting all predictions
    y_pred_alle = best_pipeline.predict(data).reshape(-1, 1).flatten()
    
    y_pred_df = pd.DataFrame(y_pred_alle, columns=['y_pred'], index=data.index)

    # Kombiner de to DataFrames
    all_pred = pd.concat([data, y_pred_df], axis=1)
    all_pred = all_pred[['muni_code', 'year', 'housing_type', 'y', 'y_pred']]
    
    # Finally return fitted models, parameters, metrics, coefficients and data for a learning curve
    return (all_pred, grid_search, best_parameters, rmse, r2, coefs, learning_curve_data)

    

In [4]:
# Getting aggregated data stored as csv's
fp = Path("../Boliga data/agg_data/")
files = list(fp.glob('*.csv'))

# initialising dataframes for saving results of the fits
metrics = pd.DataFrame(columns=['muni_code', 'rmse', 'r2'])
fitted_models = pd.DataFrame(columns=['muni_code', 'pickled_model'])
learning_curves = pd.DataFrame(columns=['muni_code',  "Train","Test", "sample size"])
coefficients = pd.DataFrame(columns=['muni_code', 'value', 'parameter'])
parameters = pd.DataFrame(columns=['muni_code', 'Parameter', 'Value']) 
all_predictions = pd.DataFrame(columns =['muni_code', 'year', 'housing_type', 'y', 'y_pred'])

data_to_concat = []  # Collect data frames to concatenate

# running the loop for modelling
for filename in tqdm.tqdm(sorted(files)):
    print(filename)
    muni_code = re.search(r'(\d+)\.csv$', str(filename)).group(1)  # extracting muni_code
    data = pd.read_csv(filename)  # reading data
    all_pred, grid_search, best_parameters, rmse, r2, coefs, learning_curve_data = make_a_model(data)

    # saving pickled models
    rick = pickle.dumps(grid_search)
    model_row = pd.DataFrame({'muni_code': [muni_code],
                              'pickled_model': [rick]})
    fitted_models= pd.concat([model_row, fitted_models], ignore_index=True)

    # saving metrics
    metric_tuple = (muni_code, rmse, r2)
    metric_row = pd.DataFrame([metric_tuple], columns=metrics.columns)
    metrics = pd.concat([metric_row, metrics], ignore_index= True)

    # saving parameters
    param_row = pd.DataFrame(list(best_parameters.items()), columns=['Parameter', 'Value'])
    param_row['muni_code'] = muni_code
    parameters = pd.concat([param_row, parameters], ignore_index = True)

    # saving coefficients
    coef_row = pd.DataFrame(coefs, columns=['value', 'parameter'])
    coef_row['muni_code'] = muni_code
    coefficients = pd.concat([coef_row, coefficients], ignore_index = True)

    # saving learning curve data
    learning_curve_data['muni_code'] = muni_code
    learning_curves = pd.concat([learning_curve_data, learning_curves], ignore_index = True)
    
    # saving yearly predictions
    all_predictions = pd.concat([all_predictions, all_pred])




  0%|                                                                                           | 0/98 [00:00<?, ?it/s]

..\Boliga data\agg_data\agg_sales_1992_2022_101.csv


  1%|▊                                                                                  | 1/98 [00:09<14:44,  9.12s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_147.csv


  2%|█▋                                                                                 | 2/98 [00:17<13:47,  8.62s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_151.csv


  3%|██▌                                                                                | 3/98 [00:27<14:38,  9.25s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_153.csv


  4%|███▍                                                                               | 4/98 [00:36<14:32,  9.28s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_155.csv


  5%|████▏                                                                              | 5/98 [00:46<14:49,  9.56s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_157.csv


  6%|█████                                                                              | 6/98 [00:55<14:19,  9.34s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_159.csv


  7%|█████▉                                                                             | 7/98 [01:04<13:57,  9.20s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_161.csv


  8%|██████▊                                                                            | 8/98 [01:12<13:19,  8.88s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_163.csv


  9%|███████▌                                                                           | 9/98 [01:21<13:10,  8.88s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_165.csv


 10%|████████▎                                                                         | 10/98 [01:30<12:48,  8.74s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_167.csv


 11%|█████████▏                                                                        | 11/98 [01:38<12:37,  8.71s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_169.csv


 12%|██████████                                                                        | 12/98 [01:47<12:19,  8.60s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_173.csv


 13%|██████████▉                                                                       | 13/98 [01:55<12:10,  8.59s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_175.csv


 14%|███████████▋                                                                      | 14/98 [02:05<12:35,  8.99s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_183.csv


 15%|████████████▌                                                                     | 15/98 [02:14<12:15,  8.86s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_185.csv


 16%|█████████████▍                                                                    | 16/98 [02:23<12:27,  9.11s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_187.csv


 17%|██████████████▏                                                                   | 17/98 [02:32<12:05,  8.96s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_190.csv


 18%|███████████████                                                                   | 18/98 [02:42<12:33,  9.42s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_201.csv


 19%|███████████████▉                                                                  | 19/98 [02:52<12:37,  9.59s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_210.csv


 20%|████████████████▋                                                                 | 20/98 [03:02<12:38,  9.72s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_217.csv


 21%|█████████████████▌                                                                | 21/98 [03:10<11:35,  9.03s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_219.csv


 22%|██████████████████▍                                                               | 22/98 [03:17<10:33,  8.34s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_223.csv


 23%|███████████████████▏                                                              | 23/98 [03:24<10:00,  8.01s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_230.csv


 24%|████████████████████                                                              | 24/98 [03:31<09:39,  7.83s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_240.csv


 26%|████████████████████▉                                                             | 25/98 [03:39<09:27,  7.78s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_250.csv


 27%|█████████████████████▊                                                            | 26/98 [03:47<09:29,  7.91s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_253.csv


 28%|██████████████████████▌                                                           | 27/98 [03:55<09:11,  7.76s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_259.csv


 29%|███████████████████████▍                                                          | 28/98 [04:03<09:09,  7.84s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_260.csv


 30%|████████████████████████▎                                                         | 29/98 [04:12<09:34,  8.32s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_265.csv


 31%|█████████████████████████                                                         | 30/98 [04:20<09:14,  8.16s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_269.csv


 32%|█████████████████████████▉                                                        | 31/98 [04:27<08:47,  7.87s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_270.csv


 33%|██████████████████████████▊                                                       | 32/98 [04:35<08:42,  7.91s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_306.csv


 34%|███████████████████████████▌                                                      | 33/98 [04:43<08:42,  8.05s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_316.csv


 35%|████████████████████████████▍                                                     | 34/98 [04:51<08:34,  8.04s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_320.csv


 36%|█████████████████████████████▎                                                    | 35/98 [05:01<08:53,  8.46s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_326.csv


 37%|██████████████████████████████                                                    | 36/98 [05:09<08:47,  8.51s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_329.csv


 38%|██████████████████████████████▉                                                   | 37/98 [05:24<10:34, 10.40s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_330.csv


 39%|███████████████████████████████▊                                                  | 38/98 [05:33<09:57,  9.95s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_336.csv


 40%|████████████████████████████████▋                                                 | 39/98 [05:43<09:36,  9.77s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_340.csv


 41%|█████████████████████████████████▍                                                | 40/98 [05:53<09:32,  9.86s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_350.csv


 42%|██████████████████████████████████▎                                               | 41/98 [06:03<09:32, 10.04s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_360.csv


 43%|███████████████████████████████████▏                                              | 42/98 [06:12<09:02,  9.69s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_370.csv


 44%|███████████████████████████████████▉                                              | 43/98 [06:20<08:31,  9.30s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_376.csv


 45%|████████████████████████████████████▊                                             | 44/98 [06:30<08:21,  9.29s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_390.csv


 46%|█████████████████████████████████████▋                                            | 45/98 [06:38<08:01,  9.08s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_400.csv


 47%|██████████████████████████████████████▍                                           | 46/98 [06:47<07:53,  9.11s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_410.csv


 48%|███████████████████████████████████████▎                                          | 47/98 [06:56<07:33,  8.88s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_420.csv


 49%|████████████████████████████████████████▏                                         | 48/98 [07:05<07:24,  8.90s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_430.csv


 50%|█████████████████████████████████████████                                         | 49/98 [07:13<07:11,  8.81s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_440.csv


 51%|█████████████████████████████████████████▊                                        | 50/98 [07:21<06:55,  8.65s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_450.csv


 52%|██████████████████████████████████████████▋                                       | 51/98 [07:29<06:34,  8.40s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_461.csv


 53%|███████████████████████████████████████████▌                                      | 52/98 [07:40<06:51,  8.94s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_479.csv


 54%|████████████████████████████████████████████▎                                     | 53/98 [07:48<06:41,  8.92s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_480.csv


 55%|█████████████████████████████████████████████▏                                    | 54/98 [07:57<06:30,  8.87s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_482.csv


 56%|██████████████████████████████████████████████                                    | 55/98 [08:06<06:16,  8.76s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_492.csv


 57%|██████████████████████████████████████████████▊                                   | 56/98 [08:14<06:00,  8.58s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_510.csv


 58%|███████████████████████████████████████████████▋                                  | 57/98 [08:24<06:12,  9.08s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_530.csv


 59%|████████████████████████████████████████████████▌                                 | 58/98 [08:35<06:20,  9.52s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_540.csv


 60%|█████████████████████████████████████████████████▎                                | 59/98 [08:43<06:01,  9.27s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_550.csv


 61%|██████████████████████████████████████████████████▏                               | 60/98 [08:52<05:40,  8.97s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_561.csv


 62%|███████████████████████████████████████████████████                               | 61/98 [09:00<05:28,  8.87s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_563.csv


 63%|███████████████████████████████████████████████████▉                              | 62/98 [09:09<05:19,  8.88s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_573.csv


 64%|████████████████████████████████████████████████████▋                             | 63/98 [09:18<05:09,  8.84s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_575.csv


 65%|█████████████████████████████████████████████████████▌                            | 64/98 [09:28<05:09,  9.10s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_580.csv


 66%|██████████████████████████████████████████████████████▍                           | 65/98 [09:35<04:43,  8.58s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_607.csv


 67%|███████████████████████████████████████████████████████▏                          | 66/98 [09:50<05:36, 10.52s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_615.csv


 68%|████████████████████████████████████████████████████████                          | 67/98 [10:00<05:19, 10.32s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_621.csv


 69%|████████████████████████████████████████████████████████▉                         | 68/98 [10:08<04:47,  9.59s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_630.csv


 70%|█████████████████████████████████████████████████████████▋                        | 69/98 [10:16<04:27,  9.23s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_657.csv


 71%|██████████████████████████████████████████████████████████▌                       | 70/98 [10:24<04:09,  8.91s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_661.csv


 72%|███████████████████████████████████████████████████████████▍                      | 71/98 [10:32<03:53,  8.66s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_665.csv


 73%|████████████████████████████████████████████████████████████▏                     | 72/98 [10:39<03:33,  8.20s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_671.csv


 74%|█████████████████████████████████████████████████████████████                     | 73/98 [10:47<03:20,  8.01s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_706.csv


 76%|█████████████████████████████████████████████████████████████▉                    | 74/98 [10:54<03:07,  7.83s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_707.csv


 77%|██████████████████████████████████████████████████████████████▊                   | 75/98 [11:03<03:07,  8.15s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_710.csv


 78%|███████████████████████████████████████████████████████████████▌                  | 76/98 [11:11<02:57,  8.06s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_727.csv


 79%|████████████████████████████████████████████████████████████████▍                 | 77/98 [11:19<02:50,  8.13s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_730.csv


 80%|█████████████████████████████████████████████████████████████████▎                | 78/98 [11:26<02:33,  7.66s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_740.csv


 81%|██████████████████████████████████████████████████████████████████                | 79/98 [11:32<02:16,  7.18s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_741.csv


 82%|██████████████████████████████████████████████████████████████████▉               | 80/98 [11:38<02:03,  6.87s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_746.csv


 83%|███████████████████████████████████████████████████████████████████▊              | 81/98 [11:45<01:53,  6.69s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_751.csv


 84%|████████████████████████████████████████████████████████████████████▌             | 82/98 [11:51<01:44,  6.52s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_756.csv


 85%|█████████████████████████████████████████████████████████████████████▍            | 83/98 [11:57<01:35,  6.40s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_760.csv


 86%|██████████████████████████████████████████████████████████████████████▎           | 84/98 [12:03<01:28,  6.33s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_766.csv


 87%|███████████████████████████████████████████████████████████████████████           | 85/98 [12:09<01:21,  6.29s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_773.csv


 88%|███████████████████████████████████████████████████████████████████████▉          | 86/98 [12:15<01:15,  6.26s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_779.csv


 89%|████████████████████████████████████████████████████████████████████████▊         | 87/98 [12:22<01:11,  6.49s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_787.csv


 90%|█████████████████████████████████████████████████████████████████████████▋        | 88/98 [12:28<01:03,  6.39s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_791.csv


 91%|██████████████████████████████████████████████████████████████████████████▍       | 89/98 [12:34<00:55,  6.18s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_810.csv


 92%|███████████████████████████████████████████████████████████████████████████▎      | 90/98 [12:40<00:48,  6.02s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_813.csv


 93%|████████████████████████████████████████████████████████████████████████████▏     | 91/98 [12:46<00:42,  6.06s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_820.csv


 94%|████████████████████████████████████████████████████████████████████████████▉     | 92/98 [12:52<00:35,  5.90s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_825.csv


 95%|█████████████████████████████████████████████████████████████████████████████▊    | 93/98 [12:57<00:28,  5.73s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_840.csv


 96%|██████████████████████████████████████████████████████████████████████████████▋   | 94/98 [13:03<00:22,  5.72s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_846.csv


 97%|███████████████████████████████████████████████████████████████████████████████▍  | 95/98 [13:08<00:17,  5.67s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_849.csv


 98%|████████████████████████████████████████████████████████████████████████████████▎ | 96/98 [13:15<00:12,  6.06s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_851.csv


 99%|█████████████████████████████████████████████████████████████████████████████████▏| 97/98 [13:21<00:06,  6.07s/it]

..\Boliga data\agg_data\agg_sales_1992_2022_860.csv


100%|██████████████████████████████████████████████████████████████████████████████████| 98/98 [13:29<00:00,  8.26s/it]


Saving results from the fitted models

In [8]:

# fitted_models 
fp = Path("fitted_models/")
coefficients.to_csv(fp/"all_coefficients_lasso_year.csv")
metrics.to_csv(fp/"all_metrics_lasso_year.csv")
learning_curves.to_csv(fp/"all_learning_curves_lasso_year.csv")
fitted_models.to_csv(fp/"all_fitted_models_lasso_year.csv")
parameters.to_csv(fp/"all_pred_parameters_lasso_year.csv")
all_predictions.to_csv(fp/"all_pred_lasso_year.csv")