# Crypto currency prediction
Members: ...

> Acknowledgement: we used some code from [Proposal for a meaningful LB + Strict LGBM](https://www.kaggle.com/julian3833/proposal-for-a-meaningful-lb-strict-lgbm). We follow the definition of "Strict" from this author.

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import tensorflow as tf
import gresearch_crypto
import gc

## Loading Data and preprocessing
We follow the "strict" criteria from [here](https://www.kaggle.com/julian3833/proposal-for-a-meaningful-lb-strict-lgbm). Therefore our score is valid.

In [None]:
# Loading traning data strictly
def read_csv_strict(file_name='../input/g-research-crypto-forecasting/train.csv'):
    df = pd.read_csv(file_name)
    df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
    df = df[df['datetime'] < '2021-06-13 00:00:00']
    return df

data_df = read_csv_strict()
data_folder = '../input/g-research-crypto-forecasting/'
asset_details_df = pd.read_csv(data_folder + 'asset_details.csv').set_index('Asset_ID')

## (Maybe) useful functions

In [None]:
def train_test_split(df, datetime):
    return df[df['datetime'] < datetime], df[df['datetime'] >= datetime]

DEFAULT_FEATURES = ['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP']
def get_Xy(df, asset_id, features=DEFAULT_FEATURES, na_treatment='drop'):
    """
    Make sure don't pass the original dataframe into this function, otherwise it would be altered.
    Available N/A treatments: drop, zero. Default to drop.
    """
    df = df[df["Asset_ID"] == asset_id]
    df = df.replace([np.inf, -np.inf], np.nan)
    if na_treatment == 'zero':
        df = df.replace(np.nan, 0.)
    else:
        df = df.dropna(how='any')
    
    X = df[features]
    y = df['Target']
    return X, y

def get_corr(pred, y):
    return np.correlate(pred, y)

def get_score(preds, ys):
    corrs, weights = [], []
    for asset in preds.keys():
        corrs.append(np.correlate(preds[asset], ys[asset]))
        weights.append(asset_details_df.loc[asset, 'Weight'])
    corrs = np.array(corrs)
    weights = np.array(weights)
    return (corrs * weights).sum() / weights.sum()

## Base class of all models
All models shall derive from this class, and rewrite function `train()` and `predict()`. The input of these functions should be pandas DataFrame.

In [None]:
class CryptoModel:
    def train(self, df):
        pass
    
    def predict(self, df_test, df_pred):
        pass

## Baseline \#1: End-to-end Fully Connected Neural Network
Turns out NN is prone to overfitting. Results are all negative.

In [None]:
# Baseline fully-connected NN on local data only
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm

class FCNetwork(nn.Module):
    def __init__(self, sizes, activation=nn.ReLU, activation_out=False):
        super().__init__()
        n_layers = len(sizes) - 1
        self.layers = []
        for i in range(n_layers):
            self.layers.append(nn.Linear(sizes[i], sizes[i+1]))
            if i != n_layers - 1 or activation_out:
                self.layers.append(activation())
        if len(self.layers) == 0:
            self.layers.append(nn.Identity())
        self.layers = nn.ModuleList(self.layers)
    
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

class FCModel(nn.Module, CryptoModel):
    def __init__(self, sizes_public, sizes_private, lr=1e-4, activation=nn.ReLU, dtype=torch.float, device='cpu'):
        super().__init__()
        n_assets = 14
        self.device = device
        self.dtype = dtype
        self.public_network = FCNetwork(sizes_public, activation=activation, activation_out=True).to(device)
        self.n_public_layers = len(sizes_public) - 1
        self.private_networks = nn.ModuleList([FCNetwork(sizes_private, activation=activation) for _ in range(n_assets)]).to(device)
        self.n_private_layers = len(sizes_private) - 1
        params = list(self.public_network.parameters()) + list(self.private_networks.parameters())
        self.optimizer = optim.Adam(params=params, lr=lr)
        self.features = ['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP']
        print('Initialized.')
  
    def forward(self, x, asset):
        h = self.public_network(x)
        pn = self.private_networks[asset]
        return pn(h).squeeze()
  
    def train(self, df, loss_fn=nn.MSELoss(), epoch=10, batch_size=1024):
        df = df.copy()
        assets = df['Asset_ID'].drop_duplicates().to_list()
        
        dataloaders = {}
        print('Begin moving.')
        for asset in assets:
            X, y = get_Xy(df, asset)
            X = torch.tensor(X.to_numpy(), device=self.device, dtype=self.dtype)
            y = torch.tensor(y.to_numpy(), device=self.device, dtype=self.dtype)
            dataset = TensorDataset(X, y)
            dataloaders[asset] = DataLoader(dataset, batch_size=batch_size)
        print('End moving.')
            
        for i in range(epoch):
            losses = []
            
            for asset in dataloaders.keys():
                dataloader = dataloaders[asset]
                loop = tqdm(enumerate(dataloader), total=len(dataloader))
                for index, (x, target) in loop:
                    pred = self.forward(x, asset)
                    loss = loss_fn(pred, target)
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
                    losses.append(loss.item())
                    loop.set_description('Epoch [{}/{}]'.format(i+1, epoch))
                    loop.set_postfix(loss = loss.item())
    
    def test(self, X, y):
        X = torch.tensor(X, device=self.device, dtype=self.dtype)
        pred = self.forward(X).numpy()
        return np.corrcoef(X, y)[0, 1]
    
    def predict(self, df_test, df_pred):
        for j , row in df_test.iterrows():
            idx = row['row_id']
            x_test = row[self.features]
            x_test = torch.tensor(x_test, device=self.device, dtype=self.dtype)
            y_pred = self.forward(x_test, row['Asset_ID']).item()
            df_pred.loc[df_pred['row_id'] == idx, 'Target'] = y_pred
        return df_pred

    def finalize(self):
        self.to('cpu')
        self.device = 'cpu'

In [None]:
"""
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
fc_model = FCModel([7, 16], [16, 1], device=device)
fc_model.train(data_df[data_df['Asset_ID'] == 1], epoch=3)
fc_model.finalize()
"""

## Baseline \#2: Light GBM with Shadow-Features
Using 2 features from [G-Research - Starter [0.361 LB]](https://www.kaggle.com/danofer/g-research-starter-0-361-lb).

`LB=0.018`.

In [None]:
from lightgbm import LGBMRegressor
from tqdm import tqdm
class BasicLGBM(CryptoModel):
    def __init__(self):
        self.models = [None] * 14
        self.features = ['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP', 'Lower_Shadow', 'Upper_Shadow']
        
    def train(self, df):
        df = df.copy()
        df = self._extend(df)
        assets = df['Asset_ID'].drop_duplicates().to_list()
        
        for asset in assets:
            X, y = get_Xy(df, asset, features=self.features)
            model = LGBMRegressor(n_estimators=10)
            model.fit(X, y)
            self.models[asset] = model
            
    def predict(self, df_test, df_pred):
        df_test = df_test.copy()
        df_test = self._extend(df_test)
        total = len(df_test.index)
        
        row_generator = df_test.iterrows()
        row_generator = tqdm(generator, total=total) # Enable progress bar, comment this line to disable it.
        for j , row in row_generator:
            asset = row['Asset_ID']
            x_test = row[self.features]
            model = self.models[int(asset)]
            y_pred = model.predict([x_test])[0]
            mask = df_pred['row_id'] == j
            df_pred.loc[mask, 'Target'] = y_pred
        return df_pred
            
    def _extend(self, df):
        df = df.copy()
        df['Upper_Shadow'] = self._upper_shadow(df)
        df['Lower_Shadow'] = self._lower_shadow(df)
        return df
    
    def _upper_shadow(self, df):
        return df['High'] - np.maximum(df['Close'], df['Open'])
    
    def _lower_shadow(self, df):
        return np.minimum(df['Close'], df['Open']) - df['Low']

## LGBM Model v1.0
This model includes features: `Upper_Shadow`, `Lower_Shadow`, `Liquidity`, `Avg_Vol`, `Rel_Upper`, `Upper_VWAP`, `Upper_Volume`. By now it does not consider correlation between groups.

`LB=0.0402`

In [None]:
from lightgbm import LGBMRegressor
from tqdm import tqdm
class LGBMv1(CryptoModel):
    def __init__(self):
        self.models = [None] * 14
        self.features = ['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP', 'Lower_Shadow', 'Upper_Shadow',
                         'Avg_Vol', 'Rel_Upper', 'Upper_VWAP', 'Upper_Vol', 'R_quantile', 'Rel_Upper_quantile']
        
    def train(self, df):
        df = df.copy()
        df = self._extend(df)
        assets = df['Asset_ID'].drop_duplicates().to_list()
        
        for asset in assets:
            X, y = get_Xy(df, asset, features=self.features)
            model = LGBMRegressor(n_estimators=10)
            model.fit(X, y)
            self.models[asset] = model
            
    def predict(self, df_test, df_pred):
        df_test = df_test.copy()
        df_test = self._extend(df_test)
        total = len(df_test.index)
        
        row_generator = df_test.iterrows()
        row_generator = tqdm(row_generator, total=total) # Enable progress bar, comment this line to disable it.
        for j , row in row_generator:
            asset = row['Asset_ID']
            x_test = row[self.features]
            model = self.models[int(asset)]
            y_pred = model.predict([x_test])[0]
            mask = df_pred['row_id'] == j
            df_pred.loc[mask, 'Target'] = y_pred
        return df_pred
            
    def _extend(self, df):
        df['Upper_Shadow'] = df['High'] - np.maximum(df['Close'], df['Open'])
        df['Lower_Shadow'] = np.minimum(df['Close'], df['Open']) - df['Low']
        #df['Liquidity'] = ((2 * (df['High'] - df['Low']) - np.absolute(df['Open'] - df['Close']))/ df['Volume']).replace([np.inf, -np.inf, np.nan], 0.)
        df['Avg_Vol'] = (df['Volume'] / df['Count']).replace([np.inf, -np.inf, np.nan], 0.)
        df['Rel_Upper'] = ((df['High'] - df['VWAP']) / (df['High'] - df['Low'])).replace([np.inf, -np.inf, np.nan], 0.)
        df['Upper_VWAP'] = ((df['High'] - df['VWAP']) / df['VWAP']).replace([np.inf, -np.inf, np.nan], 0.)
        df['Upper_Vol'] = ((df['High'] - df['VWAP']) / df['Volume']).replace([np.inf, -np.inf, np.nan], 0.)
        return df

In [None]:

model_lgbm = LGBMv1()
model_lgbm.train(data_df)


In [None]:
benchmark_debug = Benchmark()
benchmark_debug.benchmark(model_lgbm)

In [None]:
'''
for m in model_lgbm.models:
    z = list(zip(list(m.feature_name_), list(m.feature_importances_)))
    z.sort(key=lambda x:x[1], reverse=True)
    print(z[:5])
'''

## LGBM Model v1.1
Adds group correlation.

`LB=0.0447`

In [None]:
class LGBMv1_1(LGBMv1):
    def __init__(self):
        super().__init__()
        self.features += ['Upper_Rel', 'Lower_Rel', 'Upper_Rel_GA', 'Lower_Rel_GA', 'Avg_Vol_GA', 'Rel_Upper_GA']
        
    def train(self, df):
        df = df.copy()
        df = self._extend(df)
        df = self._extend_correlation(df)
        assets = df['Asset_ID'].drop_duplicates().to_list()
        
        for asset in assets:
            X, y = get_Xy(df, asset, features=self.features)
            model = LGBMRegressor(n_estimators=10)
            model.fit(X, y)
            self.models[asset] = model
            
    def predict(self, df_test, df_pred):
        df_test = df_test.copy()
        df_test = self._extend(df_test)
        df_test = self._extend_correlation(df_test)
        total = len(df_test.index)
        
        row_generator = df_test.iterrows()
        row_generator = tqdm(row_generator, total=total) # Enable progress bar, comment this line to disable it.
        for j , row in row_generator:
            asset = row['Asset_ID']
            x_test = row[self.features]
            model = self.models[int(asset)]
            y_pred = model.predict([x_test])[0]
            mask = df_pred['row_id'] == j
            df_pred.loc[mask, 'Target'] = y_pred
        return df_pred
        
    def _extend_correlation(self, df):
        df['Upper_Rel'] = (df['Upper_Shadow'] / df['Open']).replace([np.inf, -np.inf, np.nan], 0.)
        df['Lower_Rel'] = (df['Lower_Shadow'] / df['Open']).replace([np.inf, -np.inf, np.nan], 0.)
        #df['Liquidity'] = ((2 * (df['High'] - df['Low']) - np.absolute(df['Open'] - df['Close']))/ df['Volume']).replace([np.inf, -np.inf, np.nan], 0.)
        #df['Liquidity_GA'] = df.groupby(['timestamp'])['Liquidity'].transform('mean')
        df['Upper_Rel_GA'] = df.groupby(['timestamp'])['Upper_Rel'].transform('mean')
        df['Lower_Rel_GA'] = df.groupby(['timestamp'])['Lower_Rel'].transform('mean')
        df['Avg_Vol_GA'] = df.groupby(['timestamp'])['Avg_Vol'].transform('mean')
        df['Rel_Upper_GA'] = df.groupby(['timestamp'])['Rel_Upper'].transform('mean')
        return df
        

In [None]:

#model_lgbm = LGBMv1_1()
#model_lgbm.train(data_df)


# LGBM v1.2

added data of quantile.

In [None]:
from lightgbm import LGBMRegressor
from tqdm import tqdm
class LGBMv1_2(CryptoModel):
    def __init__(self):
        self.models = [None] * 14
        '''
        self.features = ['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP', 'Lower_Shadow', 'Upper_Shadow',
                         'Avg_Vol', 'Rel_Upper', 'Upper_VWAP', 'Upper_Vol', 'R_quantile', 'Rel_Upper_quantile']
        '''
        self.features = ['Count', 'Volume', 'Lower_Shadow', 'Upper_Shadow','Avg_Vol', 'Rel_Upper', 'Upper_VWAP', 'Upper_Vol', 'R_quantile', 'Rel_Upper_quantile']
    def train(self, df):
        df = df.copy()
        df = self._extend(df)
        assets = df['Asset_ID'].drop_duplicates().to_list()
        
        for asset in assets:
            X, y = get_Xy(df, asset, features=self.features)
            model = LGBMRegressor(n_estimators=10)
            model.fit(X, y)
            self.models[asset] = model
            
    def predict(self, df_test, df_pred):
        df_test = df_test.copy()
        df_test = self._extend(df_test)
        total = len(df_test.index)
        
        row_generator = df_test.iterrows()
        row_generator = tqdm(row_generator, total=total) # Enable progress bar, comment this line to disable it.
        for j , row in row_generator:
            asset = row['Asset_ID']
            x_test = row[self.features]
            model = self.models[int(asset)]
            y_pred = model.predict([x_test])[0]
            mask = df_pred['row_id'] == j
            df_pred.loc[mask, 'Target'] = y_pred
        return df_pred
            
    def _extend(self, df):
        df['Upper_Shadow'] = df['High'] - np.maximum(df['Close'], df['Open'])
        df['Lower_Shadow'] = np.minimum(df['Close'], df['Open']) - df['Low']
        #df['Liquidity'] = ((2 * (df['High'] - df['Low']) - np.absolute(df['Open'] - df['Close']))/ df['Volume']).replace([np.inf, -np.inf, np.nan], 0.)
        df['Avg_Vol'] = (df['Volume'] / df['Count']).replace([np.inf, -np.inf, np.nan], 0.)
        df['Rel_Upper'] = ((df['High'] - df['VWAP']) / (df['High'] - df['Low'])).replace([np.inf, -np.inf, np.nan], 0.)
        df['Upper_VWAP'] = ((df['High'] - df['VWAP']) / df['VWAP']).replace([np.inf, -np.inf, np.nan], 0.)
        df['Upper_Vol'] = ((df['High'] - df['VWAP']) / df['Volume']).replace([np.inf, -np.inf, np.nan], 0.)
        
        
        df['Return_1min'] = (df['Close']/df['Open'] - 1.0).replace([np.inf, -np.inf, np.nan], 0.)
        df['R_max'] = df.groupby(['timestamp'])['Return_1min'].transform('max')
        df['R_min'] = df.groupby(['timestamp'])['Return_1min'].transform('min')
        df['R_quantile'] = (df['Return_1min'] - df['R_min'])/(df['R_max'] - df['R_min'])
        
        df['Rel_Upper_max'] = df.groupby(['timestamp'])['Rel_Upper'].transform('max')
        df['Rel_Upper_min'] = df.groupby(['timestamp'])['Rel_Upper'].transform('min')
        df['Rel_Upper_quantile'] = (df['Rel_Upper'] - df['Rel_Upper_min'])/(df['Rel_Upper_max'] - df['Rel_Upper_min']).replace([np.inf, -np.inf, np.nan], 0.5)
        
    
        return df

In [None]:
model_lgbm = LGBMv1_2()
model_lgbm.train(data_df)

## Substitute Evaluation
The evaluation process of submission is slow and vague, therefore this section is implemented to benchmark the model.

In [None]:
class Benchmark:
    def __init__(self, debug=False):
        data_folder = '../input/g-research-crypto-forecasting/'
        self.test_features = ['timestamp', 'Asset_ID', 'Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP', 'group_num']
        self.asset_details_df = pd.read_csv(data_folder + 'asset_details.csv')
        if debug:
            self.df_test = pd.read_csv(data_folder + 'example_test.csv')
            self.df_pred = pd.read_csv(data_folder + 'example_sample_submission.csv')
            self.target = self.df_test.copy()
            self.target['Target'] = self.df_pred['Target']
        else:
            df = pd.read_csv(data_folder + 'supplemental_train.csv')
            df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
            df = df[df['datetime'] >= '2021-06-13 00:00:00'].head(100000)
            df = df.replace([np.inf, -np.inf], np.nan).dropna(how='any').reset_index()
            df.loc[:, 'group_num'] = 0
            df.loc[:, 'row_id'] = df.index
            self.df_test = df[self.test_features].copy()
            self.df_pred = df[['group_num', 'row_id', 'Target']].copy()
            self.df_pred['Target'] = 0.
            self.target = self.df_test.copy()
            self.target['Target'] = df['Target']
    
    def benchmark(self, model):
        df_test, df_pred, target, detail = self.df_test, self.df_pred, self.target, self.asset_details_df
        group_ids = df_test['group_num'].drop_duplicates().to_list()
        for idx in group_ids:
            y = model.predict(df_test[df_test['group_num'] == idx], df_pred[df_pred['group_num'] == idx])
            mask = df_pred['group_num'] == idx
            df_pred.loc[mask, 'Target'] = y['Target']
        
        asset_ids = df_test['Asset_ID'].drop_duplicates().to_list()
        corrs, weights = [], []
        for idx in asset_ids:
            asset_target_df = target.loc[target['Asset_ID'] == idx, 'Target']
            row_ids = asset_target_df.index
            asset_target = asset_target_df.to_numpy()
            asset_pred = df_pred.iloc[row_ids]['Target'].to_numpy()
            corr =  np.corrcoef(asset_target, asset_pred)[0, 1]
            if np.isnan(corr):
                corr = 0.
            corrs.append(corr)
            weights.append(detail[detail['Asset_ID'] == idx]['Weight'])
        corrs = np.array(corrs).squeeze()
        weights = np.array(weights).squeeze()
        print(corrs)
        print(weights)
        return (corrs * weights).sum() / weights.sum()

In [None]:

benchmark_debug = Benchmark()
benchmark_debug.benchmark(model_lgbm)


# kernel ridge regression

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn import linear_model
import matplotlib.pyplot as plt

In [None]:
# Loading traning data strictly
def read_csv_strict(file_name='../input/g-research-crypto-forecasting/train.csv'):
    df = pd.read_csv(file_name)
    df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
    df = df[df['datetime'] < '2021-06-13 00:00:00']
    return df

data_df = read_csv_strict()
data_folder = '../input/g-research-crypto-forecasting/'
asset_details_df = pd.read_csv(data_folder + 'asset_details.csv').set_index('Asset_ID')

In [None]:
class RidgeRegression():
    def __init__(self):
        self.models = [None] * 14
        self.features = ['Count', 'Volume', 'Lower_Shadow', 'Upper_Shadow','Avg_Vol', 'Rel_Upper', 'Upper_VWAP', 'Upper_Vol', 'R_quantile', 'Rel_Upper_quantile']
        
    def _extend(self,df):
        df = df.copy()
        
        df['Upper_Shadow'] = df['High'] - np.maximum(df['Close'], df['Open'])
        df['Lower_Shadow'] = np.minimum(df['Close'], df['Open']) - df['Low']
        #df['Liquidity'] = ((2 * (df['High'] - df['Low']) - np.absolute(df['Open'] - df['Close']))/ df['Volume']).replace([np.inf, -np.inf, np.nan], 0.)
        df['Avg_Vol'] = (df['Volume'] / df['Count']).replace([np.inf, -np.inf, np.nan], 0.)
        df['Rel_Upper'] = ((df['High'] - df['VWAP']) / (df['High'] - df['Low'])).replace([np.inf, -np.inf, np.nan], 0.)
        df['Upper_VWAP'] = ((df['High'] - df['VWAP']) / df['VWAP']).replace([np.inf, -np.inf, np.nan], 0.)
        df['Upper_Vol'] = ((df['High'] - df['VWAP']) / df['Volume']).replace([np.inf, -np.inf, np.nan], 0.)
        
        
        df['Return_1min'] = (df['Close']/df['Open'] - 1.0).replace([np.inf, -np.inf, np.nan], 0.)
        df['R_max'] = df.groupby(['timestamp'])['Return_1min'].transform('max')
        df['R_min'] = df.groupby(['timestamp'])['Return_1min'].transform('min')
        df['R_quantile'] = (df['Return_1min'] - df['R_min'])/(df['R_max'] - df['R_min'])
        
        df['Rel_Upper_max'] = df.groupby(['timestamp'])['Rel_Upper'].transform('max')
        df['Rel_Upper_min'] = df.groupby(['timestamp'])['Rel_Upper'].transform('min')
        df['Rel_Upper_quantile'] = (df['Rel_Upper'] - df['Rel_Upper_min'])/(df['Rel_Upper_max'] - df['Rel_Upper_min']).replace([np.inf, -np.inf, np.nan], 0.5)
        return df
    
    def train(self, df):
        df = df.copy()
        df = self._extend(df)
        assets = df['Asset_ID'].drop_duplicates().to_list()
        
        for asset in assets:
            X, y = get_Xy(df, asset, features=self.features)
            alphas = np.linspace(0.0001,0.1)
            model = linear_model.RidgeCV(alphas = alphas,store_cv_values=True)
            model.fit(X, y)
            self.models[asset] = model
            print(f'{asset} finished.')
            
    def predict(self, df_test, df_pred):
        df_test = df_test.copy()
        df_test = self._extend(df_test)
        total = len(df_test.index)
        
        row_generator = df_test.iterrows()
        row_generator = tqdm(row_generator, total=total) # Enable progress bar, comment this line to disable it.
        for j , row in row_generator:
            asset = row['Asset_ID']
            x_test = row[self.features]
            model = self.models[int(asset)]
            y_pred = model.predict([x_test])[0]
            mask = df_pred['row_id'] == j
            df_pred.loc[mask, 'Target'] = y_pred
        return df_pred

In [None]:
model_RR = RidgeRegression()
model_RR.train(data_df)

In [None]:
benchmark_debug = Benchmark()
benchmark_debug.benchmark(model_RR)

In [None]:
data_df = data[data['Asset_ID'] == 1].dropna()

In [None]:
x_data = np.array(data_df['Close']/data_df['Open']).reshape(-1,1)
y_data = data_df['Target']

alphas_to_test = np.linspace(0.0001,0.1)
model = linear_model.RidgeCV(alphas = [0.08],store_cv_values=True)

model.fit(x_data,y_data)
print(model.cv_values_[0])

In [None]:
plt.plot(alphas_to_test,model.cv_values_.mean(axis = 0))

In [None]:
model.predict(x_data[:5])


## Submission
**Important: the last cell could be run only once, due to the API requirement.**

However, it's important that this section should be able to run without any error to ensure a successful submission

In [None]:
# Register the model here
model_submission = model_lgbm

In [None]:
import gresearch_crypto

env = gresearch_crypto.make_env()
iter_test = env.iter_test()

In [None]:
for i, (df_test, df_pred) in enumerate(iter_test):
    df_pred = model_submission.predict(df_test, df_pred)
    env.predict(df_pred)

In [None]:
# debugging cell