## Import packages used

In [13]:
import pandas as pd
import midasmlpy.date_functions as datef # used to handle different frequencies of data and to create lags
import midasmlpy.sparse_group_lasso as sgl # used to run the sparse group lasso and related functions

## Load data

Load data from excel

In [14]:
import os
# load data from xlsx files and create a dataframe
Predictors = pd.read_excel(os.path.abspath('predictors-monthly.xlsx')).to_numpy()
Target = pd.read_excel(os.path.abspath('recessions-quarterly.xlsx')).to_numpy()

Split data into dates and data tables

In [15]:
# Y data and X and Y dates can also be defined as they are the same for all iterations
Y_date = Target[:,0]
Y = Target[:,1]
X_date = Predictors[:,0]
X = Predictors[:,1:]

## Transform data using functions from data_functions

Define variables ued in transformation

In [16]:
# Lag variables
x_lags = 3
y_lags = 0
horizon = 0

# Legendre matrix
degree = 4 # 3 degrees + polynomial 0

Call data transformation function

In [17]:
transformed_data = datef.data_transform(Y, Y_date, X, X_date, x_lags, y_lags, horizon, degree=degree, standardize=True)

## sgLasso binomial

In [18]:
from sklearn.model_selection import train_test_split

X = transformed_data['X_tilde']
y = transformed_data['Y']

# # Split x and y into a 80/20 train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

family = 'binomial'

# Run the sparse group lasso
model = sgl.best_model(x=X_train, y=y_train, group_size=degree, family=family, nlam=100, pmax=122, intr=False, k_folds=5, disp_flag=True, alpha_values=11, alpha=None)

The performance at different values of alpha are:
{np.float64(1.0): np.float64(0.50489), np.float64(0.9): np.float64(0.50489), np.float64(0.8): np.float64(0.50163), np.float64(0.7): np.float64(0.5183), np.float64(0.6): np.float64(0.50924), np.float64(0.5): np.float64(0.51848), np.float64(0.3999999999999999): np.float64(0.51413), np.float64(0.29999999999999993): np.float64(0.50996), np.float64(0.19999999999999996): np.float64(0.5), np.float64(0.09999999999999998): np.float64(0.50163), np.float64(0.0): np.float64(0.50163)}


In [19]:
model_alpha_specified = sgl.best_model(x=X_train, y=y_train, group_size=degree, family=family, nlam=100, pmax=122, intr=False, k_folds=5, disp_flag=True, alpha_values=None, alpha=(0.175,0.15,0.125,0.075,0.05,0.025))

The performance at different values of alpha are:
{np.float64(0.175): np.float64(0.50163), np.float64(0.15): np.float64(0.50163), np.float64(0.125): np.float64(0.50163), np.float64(0.075): np.float64(0.50163), np.float64(0.05): np.float64(0.50163), np.float64(0.025): np.float64(0.50163)}


In [20]:
df = pd.DataFrame(model_alpha_specified['beta']).set_index(pd.DataFrame(model_alpha_specified['beta']).index / 4).loc[model_alpha_specified['beta']!=0]
print(df)

         0    1    2    3    4    5    6    7    8    9   ...        90  \
1.00    0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ... -0.044630   
1.00    0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ... -0.044630   
1.00    0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ... -0.044630   
1.00    0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ... -0.044630   
1.00    0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ... -0.044630   
...     ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...       ...   
106.75  0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0  ... -0.001485   
106.75  0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0  ... -0.001485   
106.75  0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0  ... -0.001485   
106.75  0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0  ... -0.001485   
106.75  0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0  ... -0.001485   

              91        92        93        94        95        96        97  \
1.00   -0.045380 -0

In [21]:
preds = sgl.predict_binomial(x=X_test, b0=model_alpha_specified['b0'], beta=model_alpha_specified['beta'])
print(preds)

[[0 0 0 ... 0 0 0]
 [0 1 1 ... 1 1 1]
 [0 1 1 ... 1 1 1]
 ...
 [0 1 1 ... 1 1 1]
 [0 0 0 ... 0 0 0]
 [0 1 1 ... 1 1 1]]


In [22]:
model_alpha_specified

{'best_alsparse': np.float64(0.175),
 'best_performance': np.float64(0.5016304347826087),
 'b0': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'beta': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'best_lambda': np.float64(0.0169907919957961),
 'best_beta': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [23]:
sgl.evaluate_binomials(X_test, y_test, model_alpha_specified['b0'], model_alpha_specified['beta'], eval = 'auc', threshold=0.5)

[np.float64(0.5),
 np.float64(0.41532258064516125),
 np.float64(0.41532258064516125),
 np.float64(0.41532258064516125),
 np.float64(0.41532258064516125),
 np.float64(0.4475806451612903),
 np.float64(0.4637096774193549),
 np.float64(0.4637096774193549),
 np.float64(0.4637096774193549),
 np.float64(0.41532258064516125),
 np.float64(0.6330645161290323),
 np.float64(0.6330645161290323),
 np.float64(0.6169354838709677),
 np.float64(0.6008064516129032),
 np.float64(0.6008064516129032),
 np.float64(0.6008064516129032),
 np.float64(0.5846774193548387),
 np.float64(0.5846774193548387),
 np.float64(0.5846774193548387),
 np.float64(0.5846774193548387),
 np.float64(0.5846774193548387),
 np.float64(0.6008064516129032),
 np.float64(0.6008064516129032),
 np.float64(0.6008064516129032),
 np.float64(0.5846774193548387),
 np.float64(0.5846774193548387),
 np.float64(0.5846774193548387),
 np.float64(0.5846774193548387),
 np.float64(0.5846774193548387),
 np.float64(0.6008064516129032),
 np.float64(0.616935