In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import make_scorer, accuracy_score, confusion_matrix
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import ElasticNet, Lasso, Ridge, LinearRegression

In [2]:
# Define sMAPE
def smape(y_true, y_pred):
    """
    Computes the symmetric mean absolute percentage error between the true and predicted values.
    
    Parameters:
        y_true (array-like): true values of the target variable.
        y_pred (array-like): predicted values of the target variable.
    
    Returns:
        smape (float): symmetric mean absolute percentage error between y_true and y_pred.
    """
    return 100/len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))


In [3]:
# Load the merged_df_clean dataset
merged_df_clean = pd.read_csv('merged_df_clean.csv')

In [4]:
# Drop the active column from merged_df_clean dataframe
merged_df_clean = merged_df_clean.drop('active', axis=1)

In [None]:
# List of regressors to evaluate
# regressors = [LGBMRegressor(), XGBRegressor(), RandomForestRegressor(), ElasticNet(), Lasso(), Ridge(), LinearRegression()]

In [None]:
# Create a new dataframe to store the results
# results_df = pd.DataFrame(columns=['cfips', 'model_name', 'smape'])

In [None]:
#
### DO NOT RUN!!!
#
# loop through each cfips
for cfips in merged_df_clean['cfips'].unique():

    # filter the data for the current cfips
    cfips_data = merged_df_clean[merged_df_clean['cfips'] == cfips]

    # perform train-test split
    X_train, X_test, y_train, y_test = train_test_split(cfips_data[['pct_bb', 'pct_college', 'median_hh_inc']], cfips_data['microbusiness_density'], test_size=0.2, random_state=92)

    # initialize the models
    lgbm_model = LGBMRegressor()
    xgb_model = XGBRegressor()
    rf_model = RandomForestRegressor()
    en_model = ElasticNet()
    lasso_model = Lasso()
    ridge_model = Ridge()
    lr_model = LinearRegression()

    # perform cross-validation on each model
    models = [('LGBM', LGBMRegressor(random_state=92)),
          ('XGB', XGBRegressor(random_state=92)),
          ('RandomForest', RandomForestRegressor(n_estimators=10, random_state=92)),
          ('ElasticNet', ElasticNet(random_state=92)),
          ('Lasso', Lasso(random_state=92)),
          ('Ridge', Ridge(random_state=92)),
          ('LinearRegression', LinearRegression())]
    smape_results = []
    for name, model in models:
        kf = KFold(n_splits=4, shuffle=True, random_state=92)
        smape_scores = []
        for train_index, val_index in kf.split(X_train):
            X_train_kf, X_val_kf = X_train.iloc[train_index], X_train.iloc[val_index]
            y_train_kf, y_val_kf = y_train.iloc[train_index], y_train.iloc[val_index]
            model.fit(X_train_kf, y_train_kf)
            y_pred_kf = model.predict(X_val_kf)
            smape_score = smape(y_val_kf, y_pred_kf)
            smape_scores.append(smape_score)
        mean_smape = np.mean(smape_scores)
        smape_results.append((name, mean_smape))

    # select the best model based on the mean smape
    best_model = min(smape_results, key=lambda x: x[1])

    # evaluate the best model on the test set
    model = next(filter(lambda x: x[0] == best_model[0], models))[1]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    smape_score = smape(y_test, y_pred)

    # save the results to the dataframe
    results_df = pd.concat([results_df, pd.DataFrame({'cfips': cfips, 'best_model': best_model[0], 'best_smape': best_model[1]}, index=[0])], ignore_index=True)


In [6]:
# create an empty dataframe to store the results
results_df = pd.DataFrame(columns=['cfips', 'best_model', 'best_smape'])

# loop through each cfips
for cfips in merged_df_clean['cfips'].unique():

    # filter the data for the current cfips
    cfips_data = merged_df_clean[merged_df_clean['cfips'] == cfips]
    
    # skip CFIPS with less than 2 samples
    if len(cfips_data) < 2:
        continue
    
    # perform train-test split
    X_train, X_test, y_train, y_test = train_test_split(cfips_data[['pct_bb', 'pct_college', 'median_hh_inc']], cfips_data['microbusiness_density'], test_size=0.2, random_state=92)

    # skip CFIPS with not enough samples for KFold cross-validation
    if len(X_train) < 7:
        # train and evaluate each model on the entire training set
        lgbm_model = LGBMRegressor()
        xgb_model = XGBRegressor()
        rf_model = RandomForestRegressor()
        en_model = ElasticNet()
        lasso_model = Lasso()
        ridge_model = Ridge()
        lr_model = LinearRegression()

        models = [('LGBM', lgbm_model), ('XGB', xgb_model), ('RandomForest', rf_model), ('ElasticNet', en_model), ('Lasso', lasso_model), ('Ridge', ridge_model), ('LinearRegression', lr_model)]
        smape_results = []
        for name, model in models:
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            smape_score = smape(y_test, y_pred)
            smape_results.append((name, smape_score))

        # select the best model based on the test smape
        best_model = min(smape_results, key=lambda x: x[1])

    else:
        # perform cross-validation on each model
        lgbm_model = LGBMRegressor()
        xgb_model = XGBRegressor()
        rf_model = RandomForestRegressor()
        en_model = ElasticNet()
        lasso_model = Lasso()
        ridge_model = Ridge()
        lr_model = LinearRegression()

        models = [('LGBM', lgbm_model), ('XGB', xgb_model), ('RandomForest', rf_model), ('ElasticNet', en_model), ('Lasso', lasso_model), ('Ridge', ridge_model), ('LinearRegression', lr_model)]
        smape_results = []
        for name, model in models:
            kf = KFold(n_splits=7, shuffle=True, random_state=92)
            smape_scores = []
            for train_index, val_index in kf.split(X_train):
                X_train_kf, X_val_kf = X_train.iloc[train_index], X_train.iloc[val_index]
                y_train_kf, y_val_kf = y_train.iloc[train_index], y_train.iloc[val_index]
                model.fit(X_train_kf, y_train_kf)
                y_pred_kf = model.predict(X_val_kf)
                smape_score = smape(y_val_kf, y_pred_kf)
                smape_scores.append(smape_score)
            mean_smape = np.mean(smape_scores)
            smape_results.append((name, mean_smape))

        # select the best model based on the mean sm
    # select the best model based on the mean smape
    best_model = min(smape_results, key=lambda x: x[1])

    # evaluate the best model on the test set
    model = next(filter(lambda x: x[0] == best_model[0], models))[1]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    smape_score = smape(y_test, y_pred)

    # save the results to the dataframe
    results_df = pd.concat([results_df, pd.DataFrame({'cfips': cfips, 'best_model': best_model[0], 'best_smape': best_model[1]}, index=[0])], ignore_index=True)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [11]:
results_df

Unnamed: 0,cfips,best_model,best_smape
0,1001,Ridge,1.968795
1,1003,XGB,2.745682
2,1005,RandomForest,2.756395
3,1007,RandomForest,2.393472
4,1009,Ridge,1.664058
...,...,...,...
2647,56035,RandomForest,0.504146
2648,56037,Ridge,3.263048
2649,56041,LinearRegression,3.647097
2650,56043,XGB,2.513421


In [8]:
results_df.describe()

Unnamed: 0,best_smape
count,2652.0
mean,2.921476
std,2.747265
min,1.808078e-15
25%,1.421761
50%,2.19053
75%,3.451699
max,43.16624


In [9]:
merged_df_clean.describe()

Unnamed: 0.1,Unnamed: 0,cfips,microbusiness_density,year,month,pct_bb,pct_college,pct_foreign_born,pct_it_workers,median_hh_inc,region_code,state_code
count,97098.0,97098.0,97098.0,97098.0,97098.0,97098.0,97098.0,97098.0,97098.0,97098.0,97098.0,97098.0
mean,76394.671178,30762.709634,2.839416,2020.694731,6.684463,73.860246,13.126922,3.038964,1.192352,50142.137253,4.584708,26.666904
std,42453.150663,14788.64059,1.705118,0.989301,3.390741,7.805318,4.292398,2.395798,0.685869,9877.042305,1.435578,13.791514
min,1.0,1001.0,0.143384,2019.0,1.0,51.6,2.2,0.0,0.0,22292.0,1.0,1.0
25%,40497.25,19109.0,1.603922,2020.0,4.0,68.9,9.9,1.3,0.7,42965.0,4.0,15.0
50%,75954.5,29201.0,2.365911,2021.0,7.0,74.8,12.6,2.3,1.1,49850.0,5.0,25.0
75%,112136.75,45041.0,3.648241,2022.0,10.0,79.6,15.9,4.2,1.6,56533.0,5.0,40.0
max,150807.0,56045.0,8.837772,2022.0,12.0,92.8,26.2,10.7,3.2,78145.0,8.0,50.0


In [10]:
merged_df_clean.columns

Index(['Unnamed: 0', 'row_id', 'cfips', 'county', 'state',
       'first_day_of_month', 'microbusiness_density', 'year_month', 'year',
       'month', 'pct_bb', 'pct_college', 'pct_foreign_born', 'pct_it_workers',
       'median_hh_inc', 'region', 'region_code', 'state_code'],
      dtype='object')

In [12]:
# Define the features and target variable
features = ['pct_bb', 'pct_college', 'pct_foreign_born', 'median_hh_inc', 'pct_it_workers', 'cfips', 'region_code', 'state_code']
target = 'microbusiness_density'

# Create a Decision Tree regressor model
tree = DecisionTreeRegressor(max_depth=2)

# Fit the model to the data
tree.fit(merged_df_clean[features], merged_df_clean[target])

# Predict the target variable for each row in the data
y_pred = tree.predict(merged_df_clean[features])

# Add the predicted target variable to the data
merged_df_clean['y_pred'] = y_pred

# Partition the data into smaller dataframes based on the predicted target variable
dfs_dt = []
for val, df in merged_df_clean.groupby('y_pred'):
    dfs_dt.append(df.drop(columns=['y_pred']))


In [13]:
dfs_dt

[       Unnamed: 0            row_id  cfips          county    state  \
 78             97   1005_2019-08-01   1005  barbour county  alabama   
 79             98   1005_2019-09-01   1005  barbour county  alabama   
 80             99   1005_2019-10-01   1005  barbour county  alabama   
 81            100   1005_2019-11-01   1005  barbour county  alabama   
 82            101   1005_2019-12-01   1005  barbour county  alabama   
 ...           ...               ...    ...             ...      ...   
 97005      150697  56041_2021-08-01  56041    uinta county  wyoming   
 97006      150698  56041_2021-09-01  56041    uinta county  wyoming   
 97007      150699  56041_2021-10-01  56041    uinta county  wyoming   
 97008      150700  56041_2021-11-01  56041    uinta county  wyoming   
 97009      150701  56041_2021-12-01  56041    uinta county  wyoming   
 
       first_day_of_month  microbusiness_density year_month  year  month  \
 78            2019-08-01               1.073138    2019-0

to partition the data based on the continuous target variable without converting it to a categorical variable, you can use regression trees instead of classification trees. RandomForestRegressor is the regression equivalent of RandomForestClassifier in scikit-learn.

In [None]:
# Define the features and target variable
features = ['pct_bb', 'pct_college', 'pct_foreign_born', 'median_hh_inc', 'pct_it_workers', 'cfips', 'region_code', 'state_code']
target = 'microbusiness_density'

# Create a Random Forest regressor model with 10 trees
rf = RandomForestRegressor(n_estimators=10, random_state=92)

# Fit the model to the data
rf.fit(merged_df_clean[features], merged_df_clean[target])

# Predict the target variable for each row in the data
y_pred = rf.predict(merged_df_clean[features])

# Add the predicted target variable to the data
merged_df_clean['y_pred'] = y_pred

# Partition the data into smaller dataframes based on the predicted target variable
dfs_rf = []
for val, df in merged_df_clean.groupby(pd.cut(merged_df_clean['y_pred'], bins=10)):
    dfs_rf.append(df.drop(columns=['y_pred']))


In [None]:
dfs_rf

Here, we create a RandomForestRegressor model with 10 trees, and fit it to the data using the 'microbusiness_density' variable. We use the model to predict the target variable for each row in the data, and add the predicted values as a new column in the dataframe.

Then, we partition the data into smaller dataframes based on the predicted target variable using the groupby() method in pandas, and pd.cut() function to divide the range of predicted values into 10 bins. We drop the y_pred column from each dataframe before appending it to the list.

In [None]:
# Define the thresholds for the binary classification problem
thresholds = [merged_df_clean[target].quantile(0.33), merged_df_clean[target].quantile(0.67)]

# Loop over each partition
for i, df in enumerate(dfs_dt):
    print(f"Partition {i}")
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=92)

    # Train a random forest classifier
    rf = RandomForestRegressor(n_estimators=10, random_state=92)
    rf.fit(X_train, y_train)

    # Predict the target variable for the test set
    y_pred = rf.predict(X_test)

    # Classify the predictions using the thresholds
    y_pred_class = np.where(y_pred >= thresholds[0], 1, 0)

    # Calculate the accuracy, type 1 and type 2 errors, and confusion matrix
    if np.sum(y_pred_class) == 0:
        tn, fp, fn, tp = 0, 0, np.sum(y_test), 0
    else:
        tn, fp, fn, tp = confusion_matrix(y_test >= thresholds[0], y_pred_class).ravel()
    tpr = tp / (tp + fn)
    fpr = fp / (fp + tn)
    print(f"Accuracy: {accuracy_score(y_test >= thresholds[0], y_pred_class)}")
    print(f"TPR: {tpr}, FPR: {fpr}")
    print(f"Type 1 error: {fp / (tn + fp)}")
    print(f"Type 2 error: {fn / (fn + tp)}")
    print(f"Confusion matrix:\n{confusion_matrix(y_test >= thresholds[0], y_pred_class)}\n")


In [17]:
# Define the thresholds for the binary classification problem
thresholds = [merged_df_clean[target].quantile(0.33), merged_df_clean[target].quantile(0.67)]

# Define a list of models to evaluate
models = [('LGBM', LGBMRegressor(random_state=92)),
          ('XGB', XGBRegressor(random_state=92)),
          ('RandomForest', RandomForestRegressor(n_estimators=10, random_state=92)),
          ('ElasticNet', ElasticNet(random_state=92)),
          ('Lasso', Lasso(random_state=92)),
          ('Ridge', Ridge(random_state=92)),
          ('LinearRegression', LinearRegression())]

# Loop over each partition
for i, df in enumerate(dfs_dt):
    print(f"Partition {i}")
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=92)
    
    # Loop over each model
    for model_name, model in models:
        # Train the model
        model.fit(X_train, y_train)
        
        # Predict the target variable for the test set
        y_pred = model.predict(X_test)
        
        # Classify the predictions using the thresholds
        y_pred_class = np.where(y_pred >= thresholds[0], 1, 0)

        # Calculate the evaluation metrics
        if np.sum(y_pred_class) == 0:
            tn, fp, fn, tp = 0, 0, np.sum(y_test), 0
        else:
            tn, fp, fn, tp = confusion_matrix(y_test >= thresholds[0], y_pred_class).ravel()
        tpr = tp / (tp + fn)
        fpr = fp / (fp + tn)
        
        # Calculate SMAPE
        smape_score = smape(y_test, y_pred)
        
        
        print(f"Model: {model_name}")
        print(f"Accuracy: {accuracy_score(y_test >= thresholds[0], y_pred_class)}")
        print(f"SMAPE: {smape_score}")
        print(f"TPR: {tpr}, FPR: {fpr}")
        print(f"Type 1 error: {fp / (tn + fp)}")
        print(f"Type 2 error: {fn / (fn + tp)}")
        print(f"Confusion matrix:\n{confusion_matrix(y_test >= thresholds[0], y_pred_class)}\n")
        
        

Partition 0
Model: LGBM
Accuracy: 0.8438102583879676
SMAPE: 20.306180301144938
TPR: 0.8440860215053764, FPR: 0.15639810426540285
Type 1 error: 0.15639810426540285
Type 2 error: 0.15591397849462366
Confusion matrix:
[[3738  693]
 [ 522 2826]]

Model: XGB
Accuracy: 0.9096284869520503
SMAPE: 11.75940928233866
TPR: 0.9124850657108722, FPR: 0.09252990295644324
Type 1 error: 0.09252990295644324
Type 2 error: 0.08751493428912784
Confusion matrix:
[[4021  410]
 [ 293 3055]]

Model: RandomForest
Accuracy: 0.9727471397351845
SMAPE: 3.6139251529715
TPR: 0.9716248506571087, FPR: 0.026404874746106973
Type 1 error: 0.026404874746106973
Type 2 error: 0.028375149342891277
Confusion matrix:
[[4314  117]
 [  95 3253]]

Model: ElasticNet
Accuracy: 0.6400565625401723
SMAPE: 34.618915319382225
TPR: 0.6833930704898447, FPR: 0.3926878808395396
Type 1 error: 0.3926878808395396
Type 2 error: 0.31660692951015534
Confusion matrix:
[[2691 1740]
 [1060 2288]]

Model: Lasso
Accuracy: 0.6342717572952822
SMAPE: 34.93

We first define a list of models to evaluate, which includes the following algorithms: LGBM, XGB, RandomForest, ElasticNet, Lasso, Ridge, and LinearRegression. We then loop over each partition, and for each partition, we loop over each model, train the model on the training set, and make predictions on the test set. We then classify the predictions using the predefined thresholds, calculate the evaluation metrics, and print the results.

In [16]:
merged_df_clean['cfips'].nunique()

2655

In [15]:
# Create an empty dataframe to store the results
results_list = []

# Define the thresholds for the binary classification problem
thresholds = [merged_df_clean[target].quantile(0.30), merged_df_clean[target].quantile(0.70)]

# Define a list of models to evaluate
models = [('LGBM', LGBMRegressor(random_state=92)),
          ('XGB', XGBRegressor(random_state=92)),
          ('RandomForest', RandomForestRegressor(n_estimators=10, random_state=92)),
          ('ElasticNet', ElasticNet(random_state=92)),
          ('Lasso', Lasso(random_state=92)),
          ('Ridge', Ridge(random_state=92)),
          ('LinearRegression', LinearRegression())]

# Loop over each partition
for i, df in enumerate(dfs_dt):
#    print(f"Partition {i}")
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=92)
    
    # Loop over each model
    for model_name, model in models:
        # Train the model
        model.fit(X_train, y_train)
        
        # Predict the target variable for the test set
        y_pred = model.predict(X_test)
        
        # Classify the predictions using the thresholds
        y_pred_class = np.where(y_pred >= thresholds[0], 1, 0)

        # Calculate the evaluation metrics
        if np.sum(y_pred_class) == 0:
            tn, fp, fn, tp = 0, 0, np.sum(y_test), 0
        else:
            tn, fp, fn, tp = confusion_matrix(y_test >= thresholds[0], y_pred_class).ravel()
        tpr = tp / (tp + fn)
        fpr = fp / (fp + tn)
        type1_err = fp / (tn + fp)
        type2_err = fn / (fn + tp)
        
        # Calculate SMAPE
        smape_score = smape(y_test, y_pred)
        
        # Add the results to the list
        results_list.append({'Partition': i,
                             'Model': model_name,
                             'Accuracy': accuracy_score(y_test >= thresholds[0], y_pred_class),
                             'TPR': tpr,
                             'FPR': fpr,
                             'Type 1 Error': type1_err,
                             'Type 2 Error': type2_err,
                             'SMAPE': smape_score})

# Create a dataframe from the results list
results_df = pd.concat([pd.DataFrame.from_records([r]) for r in results_list], ignore_index=True)

# Print the results dataframe
results_df


Unnamed: 0,Partition,Model,Accuracy,TPR,FPR,Type 1 Error,Type 2 Error,SMAPE
0,0,LGBM,0.830055,0.870595,0.207041,0.207041,0.129405,20.30618
1,0,XGB,0.904101,0.914716,0.105613,0.105613,0.085284,11.759409
2,0,RandomForest,0.971333,0.972559,0.029788,0.029788,0.027441,3.613925
3,0,ElasticNet,0.640828,0.777778,0.48449,0.48449,0.222222,34.618915
4,0,Lasso,0.630672,0.762712,0.490153,0.490153,0.237288,34.938067
5,0,Ridge,0.684664,0.795265,0.416544,0.416544,0.204735,32.323899
6,0,LinearRegression,0.684664,0.795265,0.416544,0.416544,0.204735,32.3239
7,1,LGBM,0.852172,0.987378,0.629358,0.629358,0.012622,17.410391
8,1,XGB,0.93926,0.986347,0.22844,0.22844,0.013653,8.004332
9,1,RandomForest,0.984111,0.990469,0.038532,0.038532,0.009531,3.253937


In [18]:
# Test the models on the whole dataset

# Define the thresholds for the binary classification problem
thresholds = [merged_df_clean[target].quantile(0.30), merged_df_clean[target].quantile(0.70)]

# Define a list of models to evaluate
models = [('LGBM', LGBMRegressor(random_state=92)),
          ('XGB', XGBRegressor(random_state=92)),
          ('RandomForest', RandomForestRegressor(n_estimators=10, random_state=92)),
          ('ElasticNet', ElasticNet(random_state=92)),
          ('Lasso', Lasso(random_state=92)),
          ('Ridge', Ridge(random_state=92)),
          ('LinearRegression', LinearRegression())]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(merged_df_clean[features], merged_df_clean[target], test_size=0.2, random_state=92)

# Loop over each model
for model_name, model in models:
    # Train the model
    model.fit(X_train, y_train)

    # Predict the target variable for the test set
    y_pred = model.predict(X_test)

    # Classify the predictions using the thresholds
    y_pred_class = np.where(y_pred >= thresholds[0], 1, 0)

    # Calculate the evaluation metrics
    if np.sum(y_pred_class) == 0:
        tn, fp, fn, tp = 0, 0, np.sum(y_test), 0
    else:
        tn, fp, fn, tp = confusion_matrix(y_test >= thresholds[0], y_pred_class).ravel()
    tpr = tp / (tp + fn)
    fpr = fp / (fp + tn)

    # Calculate SMAPE
    smape_score = smape(y_test, y_pred)


    print(f"Model: {model_name}")
    print(f"Accuracy: {accuracy_score(y_test >= thresholds[0], y_pred_class)}")
    print(f"SMAPE: {smape_score}")
    print(f"TPR: {tpr}, FPR: {fpr}")
    print(f"Type 1 error: {fp / (tn + fp)}")
    print(f"Type 2 error: {fn / (fn + tp)}")
    print(f"Confusion matrix:\n{confusion_matrix(y_test >= thresholds[0], y_pred_class)}\n")

Model: LGBM
Accuracy: 0.8215756951596292
SMAPE: 24.461656782882617
TPR: 0.9474959612277868, FPR: 0.47397449155463633
Type 1 error: 0.47397449155463633
Type 2 error: 0.05250403877221325
Confusion matrix:
[[ 3052  2750]
 [  715 12903]]

Model: XGB
Accuracy: 0.8730690010298661
SMAPE: 17.17386845964759
TPR: 0.9554266412101631, FPR: 0.32023440193036884
Type 1 error: 0.32023440193036884
Type 2 error: 0.04457335878983698
Confusion matrix:
[[ 3944  1858]
 [  607 13011]]

Model: RandomForest
Accuracy: 0.9803295571575695
SMAPE: 3.190121728977069
TPR: 0.9872962255837862, FPR: 0.03602206135815236
Type 1 error: 0.03602206135815236
Type 2 error: 0.012703774416213835
Confusion matrix:
[[ 5593   209]
 [  173 13445]]

Model: ElasticNet
Accuracy: 0.7735839340885685
SMAPE: 34.92263770110042
TPR: 0.9478631223380819, FPR: 0.6354705274043433
Type 1 error: 0.6354705274043433
Type 2 error: 0.052136877661918046
Confusion matrix:
[[ 2115  3687]
 [  710 12908]]

Model: Lasso
Accuracy: 0.7646240988671472
SMAPE: 3