## Import packages used

In [2]:
import numpy as np
import pandas as pd
from datetime import datetime
import midasmlpy.date_functions as datef # used to handle different frequencies of data and to create lags
import midasmlpy.sparse_group_lasso as sgl # used to run the sparse group lasso and related functions
from sklearn.model_selection import train_test_split

## Load data

Load data from excel

In [3]:
# load data from xlsx files and create a dataframe
Predictors = pd.read_excel('/Users/m.egelundmuller/Documents/GitHub/midasmlpy/user_guide/predictors-monthly.xlsx').to_numpy()
Target = pd.read_excel('/Users/m.egelundmuller/Documents/GitHub/midasmlpy/user_guide/gdp-quarterly.xlsx').to_numpy()

Split data into dates and data tables

In [4]:
# Y data and X and Y dates can also be defined as they are the same for all iterations
Y_date = Target[:,0]
Y = Target[:,1]
X_date = Predictors[:,0]
X = Predictors[:,1:12]

## Transform data using functions from data_functions

Define variables ued in transformation

In [5]:
# Lag variables
x_lags = 3
y_lags = 0
horizon = 0

# Legendre matrix
legendre_degree = 2 # 3 degrees + polynomial 0

Call data transformation function

In [6]:
transformed_data = datef.data_transform(Y, Y_date, X, X_date, x_lags, y_lags, horizon, legendre_degree=legendre_degree, standardize = True)

In [7]:
x = transformed_data['X_tilde']
y = transformed_data['Y']

# # Split x and y into a 80/20 train test split
train_size = int(0.8*x.shape[0])
x_train, x_test = x[:train_size], x[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

## sgLasso

In [8]:
import sparsegllog_compiled
from scipy.sparse.linalg import svds

In [9]:
group_size = legendre_degree
alsparse = 0.95
pmax = 22
intr = True
nlam=None
ulam=None

#function to run the sparse group lasso
if ulam is not None:
    ulam = np.array(ulam)
    nlam = len(ulam)  # Override nlam based on the length of ulam if ulam is provided
elif nlam is None:
    nlam = 100  # Default value if neither ulam nor nlam is provided

if ulam is None:
    ulam = np.ones(nlam)  # Default ulam if not provided

nobs,nvars = x.shape[0], x.shape[1] # Number of observations and features
eps = 0.1 # Convergence threshold
maxit = 3e8 # Maximum number of iterations
bn = x.shape[1]//group_size # Number of groups as an integer
bs = np.full(bn, group_size, dtype=int) # Elements in groups
ix, iy =  np.array(range(0, nvars, group_size)), np.array(range(group_size-1, nvars, group_size)) # Placement og first column of each group in x
gam = 0.25 * sgl.calc_gamma(x, ix, iy, bn) # Calculate gamma values for each group of features (columns) 
pf, pfl1 = np.sqrt(bs),np.ones(nvars) # Penalty factors for L2 and L1 penalties
dfmax = bn + 1 # Maximum number of groups
flmin = 0.01 if nobs < nvars else 1e-04
lb,ub = np.full(bn, -np.inf),np.full(bn, np.inf) # Lower and upper bounds for the coefficients


In [10]:
nalam, b0, beta, activeGroup, nbeta, alam, npass, jerr, mse = sparsegllog_compiled.sparse_four(x = x,
                y = y, bn = bn, bs = bs, 
                ix = ix + 1, iy = iy + 1, # iy and ix are +1 as fortran is index 1 while python is index 0
                gam = gam, nobs = nobs, 
                nvars = nvars, pf = pf, pfl1 = pfl1, dfmax = dfmax, pmax = pmax, 
                nlam = nlam, flmin = flmin, ulam = ulam, eps = eps, maxit = maxit, 
                intr = intr, lb = lb, ub = ub, alsparse = alsparse)
if jerr != 0:
    raise ValueError("Error in the sparse group lasso estimation.")
if npass == maxit:
    raise ValueError("Failed to converge in the sparse group lasso estimation.")

In [None]:
npass

375