## Import packages used

In [1]:
import numpy as np
import pandas as pd
import midasmlpy.date_functions as datef # used to handle different frequencies of data and to create lags
import midasmlpy.sparse_group_lasso as sgl # used to run the sparse group lasso and related functions

## Load data

Load data from excel

In [2]:
# load data from xlsx files and create a dataframe
Predictors = pd.read_excel('/Users/m.egelundmuller/Documents/GitHub/midasmlpy/user_guide/predictors-monthly.xlsx').to_numpy()
Target = pd.read_excel('/Users/m.egelundmuller/Documents/GitHub/midasmlpy/user_guide/recessions-quarterly.xlsx').to_numpy()

Split data into dates and data tables

In [3]:
# Y data and X and Y dates can also be defined as they are the same for all iterations
Y_date = Target[:,0]
Y = Target[:,1]
X_date = Predictors[:,0]
X = Predictors[:,1:]

## Transform data using functions from data_functions

Define variables ued in transformation

In [4]:
# Lag variables
x_lags = 3
y_lags = 0
horizon = 0

# Legendre matrix
legendre_degree = 4 # 3 degrees + polynomial 0

Call data transformation function

In [5]:
transformed_data = datef.data_transform(Y, Y_date, X, X_date, x_lags, y_lags, horizon, legendre_degree=legendre_degree, standardize = True)

## sgLasso

In [6]:
x = transformed_data['X_tilde']
y = transformed_data['Y']

# # Split x and y into a 80/20 train test split
train_size = int(0.8*x.shape[0])
x_train, x_test = x[:train_size], x[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Run the sparse group lasso
model = sgl.best_model(x = x_train, y = y_train, group_size = legendre_degree, nlam = 100, pmax = 122, intr = False, k_folds = 5, disp_flag = True, alpha_values = 11, alpha = None)

The performance at different values of alpha are:
{1.0: 0.51442, 0.9: 0.51442, 0.8: 0.51442, 0.7: 0.51442, 0.6: 0.52257, 0.5: 0.56304, 0.3999999999999999: 0.59239, 0.29999999999999993: 0.62536, 0.19999999999999996: 0.62971, 0.09999999999999998: 0.60453, 0.0: 0.60018}


In [7]:
model_alphaspecified = sgl.best_model(x = x_train, y = y_train, group_size = legendre_degree, nlam = 100, pmax = 122, intr = False, k_folds = 5, disp_flag = True, alpha_values = None, alpha = (0.175,0.15,0.125,0.075,0.05,0.025))

The performance at different values of alpha are:
{0.175: 0.63388, 0.15: 0.62953, 0.125: 0.60453, 0.075: 0.60453, 0.05: 0.60036, 0.025: 0.60018}


In [8]:
pd.DataFrame(model_alphaspecified['beta']).set_index(pd.DataFrame(model_alphaspecified['beta']).index / 4).loc[model_alphaspecified['beta']!=0]
# # Show column names of column 3,4,7,8 of pred
# pred.columns[[2,15,20,36,45,70,91,107]]
# model_alphaspecified

Unnamed: 0,0
1.0,-0.010222
1.25,-0.004834
1.5,-0.006995
1.75,-0.020859
14.25,0.000594
14.5,0.002163
14.75,-0.000944
19.0,-0.010817
19.75,0.053351
36.0,-0.206776
