In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import torch

In [2]:
data_types_dict = {
    'time_id': 'int32',
    'investment_id': 'int16',
    "target": 'float16',
}

features = [f'f_{i}' for i in range(300)]

for f in features:
    data_types_dict[f] = 'float16'

In [3]:
data = pd.read_csv('../input/ubiquant-market-prediction/train.csv', 
                       usecols = data_types_dict.keys(),
                       dtype=data_types_dict,
                       index_col = 0, chunksize = 3141410) #3141410

In [4]:
data = next(iter(data))

  mask |= (ar1 == a)


In [5]:
"""
time_id = 270 #40
X_train = data.loc[:time_id-1,]
X_test = data.loc[time_id:,]
print(X_train.shape[0], X_test.shape[0])

y_train = X_train['target']
X_train.drop(['target'], inplace=True, axis=1)

y_test = X_test['target']
X_test.drop(['target'], inplace=True, axis=1)
print(y_train.shape[0], y_test.shape[0])
print(y_test.shape[0]/y_train.shape[0])
"""

"\ntime_id = 270 #40\nX_train = data.loc[:time_id-1,]\nX_test = data.loc[time_id:,]\nprint(X_train.shape[0], X_test.shape[0])\n\ny_train = X_train['target']\nX_train.drop(['target'], inplace=True, axis=1)\n\ny_test = X_test['target']\nX_test.drop(['target'], inplace=True, axis=1)\nprint(y_train.shape[0], y_test.shape[0])\nprint(y_test.shape[0]/y_train.shape[0])\n"

In [6]:
target = data['target']
#data.drop(['target', 'investment_id'], inplace=True, axis=1)
data.drop(['target'], inplace=True, axis=1)

In [7]:
from scipy.stats import pearsonr 
def est_score(model):
    y_pred = model.predict(X_test)
    return pearsonr(y_pred, y_test)[0]

In [8]:
#from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test = train_test_split(data, data['target'], test_size=0.1, stratify=data.index)

#id_train = X_train['investment_id']
#id_test = X_test['investment_id']

#X_train.drop(['target', 'investment_id'], inplace=True, axis=1)
#X_test.drop(['target', 'investment_id'], inplace=True, axis=1)

In [9]:
#id_train.isna().sum()

In [10]:
#del data

In [11]:
#from sklearn.linear_model import LinearRegression
#reg_model = LinearRegression(copy_X=False).fit(X_train, y_train)

In [12]:
#est_score(reg_model)

In [13]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def move_to(data, device):
    """
    moving data to device
    :param data: data to move
    :param device: device
    :return: moved data
    """

    if isinstance(data, (list, tuple)):
        return [move_to(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [14]:
def batch_loader(X, y, batch_size = 4096, equal=False):
    i = 0
    while True:
        if i + batch_size >= y.shape[0]: 
            break
            
        yield X.iloc[i:i+batch_size, :], y[i:i+batch_size]
        i += batch_size

In [15]:
def pearson_loss(x, y):
    vx = x - torch.mean(x)
    vy = y - torch.mean(y)
    loss = -torch.sum(vx*vy) / (torch.sqrt(torch.sum(vx*vx)) * torch.sqrt(torch.sum(vy*vy)))
    return loss

In [16]:
def reduce_matching(a):
    a = a[0].tolist()
    ids = list(set(a))
    ids_dict = {}
    for i in range(len(ids)):
        ids_dict[ids[i]] = i

    for i in range(len(a)):
        a[i] = ids_dict[a[i]]
    return move_to(torch.FloatTensor(a).view(-1, 1), device)

In [17]:
def standard(a):
    a -= a.mean()
    a /= a.std()
    return a

In [18]:
class DNN_Model(torch.nn.Module):
    def __init__(self, deep_size):
        super().__init__()
        self.deep_size = deep_size
        self.index = 0
        self.model_name = 'DNN_Model_0'
        self.linear_1 = torch.nn.Linear(300, deep_size)
        self.linear_2 = torch.nn.Linear(deep_size, deep_size)
        self.linear_3 = torch.nn.Linear(deep_size+64, 128)
        self.linear_4 = torch.nn.Linear(128, 128)
        self.linear_5 = torch.nn.Linear(128, 64)
        self.linear_6 = torch.nn.Linear(64, 1)
        
        self.id_linear_1 = torch.nn.Linear(1, 16)
        self.id_linear_2 = torch.nn.Linear(16, 32)
        self.id_linear_3 = torch.nn.Linear(32, 64)
        
        self.dout = torch.nn.Dropout(p=0.5)
        self.act = torch.nn.SiLU()
    
    def tensor_forward(self, X_tensor):
        X_tensor = move_to(X_tensor, device)
        ids = X_tensor[:, 0].view(1, -1)
        X_tensor = X_tensor[:, 1:]
        
        ids = standard(reduce_matching(ids))
        
        ids = self.act(self.id_linear_1(ids))
        ids = self.act(self.id_linear_2(ids))
        ids = self.act(self.id_linear_3(ids))
        
        tensor_X = self.act(self.linear_1(X_tensor))
        tensor_X = self.act(self.linear_2(tensor_X))
        
        tensor_X = torch.cat([tensor_X, ids], axis=1)
        
        tensor_X = self.act(self.linear_3(tensor_X))
        #tensor_X = self.dout(tensor_X)
        tensor_X = self.act(self.linear_4(tensor_X))
        tensor_X = self.act(self.linear_5(tensor_X))
        tensor_X = self.linear_6(tensor_X)
        return tensor_X 

    def forward(self, X):
        tensor_X = torch.FloatTensor(X.values)
        return self.tensor_forward(tensor_X)

In [19]:
class DNN_Model_1(torch.nn.Module):
    def __init__(self, deep_size):
        super().__init__()
        self.deep_size = deep_size
        self.index = 1
        self.model = torch.nn.Sequential(
            torch.nn.Linear(300, deep_size),
            torch.nn.SiLU(),
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Dropout(p=0.3),
            
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Dropout(p=0.3),
            
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Dropout(p=0.3),
            
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Dropout(p=0.3),
            
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Dropout(p=0.3),
            
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Dropout(p=0.3),
            
            torch.nn.Linear(deep_size, deep_size),
            torch.nn.SiLU(),
            torch.nn.Linear(deep_size, 1)
        )
        
        self.model_name = 'DNN_Model_1'
        
                
    def tensor_forward(self, X_tensor):
        X_tensor = move_to(X_tensor, device)
        X_tensor = X_tensor[:, 1:]
        return self.model(X_tensor)

    def forward(self, X):
        tensor_X = torch.FloatTensor(X.values)
        return self.tensor_forward(tensor_X)

In [20]:
class DNN_Model_2(torch.nn.Module):
    def __init__(self, deep_size):
        super().__init__()
        self.deep_size = deep_size
        self.index = 2
        self.model_name = 'DNN_Model_2'
        self.linear_1 = torch.nn.Linear(300, deep_size)
        self.linear_2 = torch.nn.Linear(deep_size, 350)
        self.linear_3 = torch.nn.Linear(350, deep_size)
        self.linear_4 = torch.nn.Linear(deep_size+64, 256)
        self.linear_5 = torch.nn.Linear(256, 256)
        self.linear_6 = torch.nn.Linear(256, 128)
        self.linear_7 = torch.nn.Linear(128, 128)
        self.linear_8 = torch.nn.Linear(128, 64)
        self.linear_9 = torch.nn.Linear(64, 1)
        
        self.id_linear_1 = torch.nn.Linear(1, 32)
        self.id_linear_2 = torch.nn.Linear(32, 64)
        self.id_linear_3 = torch.nn.Linear(64, 100)
        self.id_linear_4 = torch.nn.Linear(100, 64)
        self.id_linear_5 = torch.nn.Linear(64, 64)
        
        self.dout = torch.nn.Dropout(p=0.4)
        self.act = torch.nn.SiLU()
    
    def tensor_forward(self, X_tensor):
        X_tensor = move_to(X_tensor, device)
        ids = X_tensor[:, 0].view(1, -1)
        X_tensor = X_tensor[:, 1:]
        
        ids = standard(reduce_matching(ids))
        
        ids = self.act(self.id_linear_1(ids))
        ids = self.act(self.id_linear_2(ids))
        ids = self.act(self.id_linear_3(ids))
        ids = self.dout(ids)
        ids = self.act(self.id_linear_4(ids))
        ids = self.act(self.id_linear_5(ids))
        
        tensor_X = self.act(self.linear_1(X_tensor))
        tensor_X = self.act(self.linear_2(tensor_X))
        tensor_X = self.dout(tensor_X)
        tensor_X = self.act(self.linear_3(tensor_X))
        
        tensor_X = torch.cat([tensor_X, ids], axis=1)
        
        tensor_X = self.act(self.linear_4(tensor_X))
        tensor_X = self.act(self.linear_5(tensor_X))
        tensor_X = self.dout(tensor_X)
        tensor_X_6 = self.act(self.linear_6(tensor_X))
        tensor_X = self.act(self.linear_7(tensor_X_6))
        tensor_X = self.act(self.linear_8(tensor_X + tensor_X_6))
        tensor_X = self.linear_9(tensor_X)
        return tensor_X 

    def forward(self, X):
        tensor_X = torch.FloatTensor(X.values)
        return self.tensor_forward(tensor_X)

In [21]:
class DNN_Model_3(torch.nn.Module):
    def __init__(self, deep_size):
        super().__init__()
        self.deep_size = deep_size
        self.index = 3
        self.linear1 = torch.nn.Linear(300, deep_size)
        
        self.linear2_1 = torch.nn.Linear(deep_size, deep_size)
        self.linear2_2 = torch.nn.Linear(deep_size, deep_size)
        self.linear2_3 = torch.nn.Linear(deep_size, deep_size)
        
        self.linear3_1 = torch.nn.Linear(deep_size, deep_size)
        self.linear3_2 = torch.nn.Linear(deep_size, deep_size)
        self.linear3_3 = torch.nn.Linear(deep_size, deep_size)
        
        self.linear4_1 = torch.nn.Linear(deep_size, deep_size)
        self.linear4_2 = torch.nn.Linear(deep_size, deep_size)
        self.linear4_3 = torch.nn.Linear(deep_size, deep_size)
        
        self.linear5_1 = torch.nn.Linear(deep_size, deep_size)
        self.linear5_2 = torch.nn.Linear(deep_size, deep_size)
        self.linear5_3 = torch.nn.Linear(deep_size, deep_size)
        
        self.linear6_1 = torch.nn.Linear(deep_size, deep_size)
        self.linear6_2 = torch.nn.Linear(deep_size, 1)
        
        self.act = torch.nn.SiLU()
        self.dout = torch.nn.Dropout(p=0.5)
        
        self.model_name = 'DNN_Model_3'

                
    def tensor_forward(self, X_tensor):
        X_tensor = move_to(X_tensor, device)
        X_tensor = X_tensor[:, 1:]
        X_tensor = self.act(self.linear1(X_tensor))
        
        X_tensor = self.act(self.linear2_1(X_tensor))
        X_tensor = self.act(self.linear2_2(X_tensor))
        X_tensor = self.dout(X_tensor)
        res_2 = self.act(self.linear2_3(X_tensor))
        
        
        #res_2 = X_tensor
        
        X_tensor = self.act(self.linear3_1(res_2))
        X_tensor = self.act(self.linear3_2(X_tensor))
        X_tensor = self.dout(X_tensor)
        res_3 = self.act(self.linear3_3(X_tensor))
        
        
        #res_3 = X_tensor
        
        X_tensor = self.act(self.linear4_1(res_2 + res_3))
        X_tensor = self.act(self.linear4_2(X_tensor))
        X_tensor = self.dout(X_tensor)
        res_4 = self.act(self.linear4_3(X_tensor))
        
        res_4 = X_tensor

        X_tensor = self.act(self.linear5_1(res_3 + res_4))
        X_tensor = self.act(self.linear5_2(X_tensor))
        X_tensor = self.dout(X_tensor)
        X_tensor = self.act(self.linear5_3(X_tensor))
        
        X_tensor = self.act(self.linear6_1(X_tensor + res_4))
        X_tensor = self.linear6_2(X_tensor)
        
        return X_tensor
        

    def forward(self, X):
        tensor_X = torch.FloatTensor(X.values)
        return self.tensor_forward(tensor_X)

In [22]:
class DNN_Model_4(torch.nn.Module):
    def __init__(self, deep_size):
        super().__init__()
        self.deep_size = deep_size
        self.index = 4
        act_1 = torch.nn.SiLU
        self.model = self.model = torch.nn.Sequential(
            torch.nn.Linear(300, deep_size),
            act_1(),
            torch.nn.Linear(deep_size, deep_size),
            act_1(),
            torch.nn.Linear(deep_size, 2*deep_size),
            act_1(),
            torch.nn.Linear(2*deep_size, 2*deep_size),
            
            torch.nn.Dropout(p=0.4),
            
            torch.nn.Linear(2*deep_size, 2*deep_size),
            act_1(),
            torch.nn.Linear(2*deep_size, 2*deep_size),
            act_1(),
            torch.nn.Linear(2*deep_size, deep_size),
            act_1(),
            
            torch.nn.Dropout(p=0.4),
            
            torch.nn.Linear(deep_size, deep_size),
            act_1(),
            torch.nn.Linear(deep_size, deep_size),
            act_1(),
            torch.nn.Linear(deep_size, 1)
            
        )
        
        self.model_name = 'DNN_Model_4'
        
                
    def tensor_forward(self, X_tensor):
        X_tensor = move_to(X_tensor, device)
        X_tensor = X_tensor[:, 1:]
        return self.model(X_tensor)

    def forward(self, X):
        tensor_X = torch.FloatTensor(X.values)
        return self.tensor_forward(tensor_X)

In [23]:
dnn_models = [DNN_Model, DNN_Model_1, DNN_Model_2, DNN_Model_3, DNN_Model_4]

In [24]:
#написать subsample еще раз (также попробовать booststrap)
#написать еще пару моделей, желательно как можно более разных
#из большого количества моделей выбрать топ лучших (на полном датасете!)
#продумать как можно (например с помощью матрицы корреляции) стакать сети в одной модели чтобы увеличить скор
#поработать с бустами, попробовать их пообъединять

In [25]:
def corr_based_coefs(a):
    if a.shape[0] < 2:
        return np.array([1])
    preds_sum = np.sum(a, axis=0)
    corrs = np.array([pearsonr( (preds_sum-a[i])/(a.shape[0]-1), a[i])[0] for i in range(a.shape[0])])
    #print(f'Correlation coefs: {corrs}')
    corrs = 1 - corrs
    corrs /= np.sum(corrs)
    #print(f'Stacking coefs: {corrs}')
    return corrs

In [26]:
class StackedDNN:
    def __init__(self, n, model, min_deep=30, max_deep=40):
        self.nets = [model(np.random.randint(min_deep, max_deep)) for i in range(n)]
        self.model_name = self.nets[0].model_name
        for net in self.nets:
            net = move_to(net, device)
    
    def fit(self, X, y, iters, batch_size=4096, subset=1, float_subset=True, silent=True, logs=True, plot=False, val=False, X_val=None, y_val=None):
        
        if val:
            tensor_train_val = move_to(torch.FloatTensor(X_val.values), device)
            tensor_y_val = move_to(torch.FloatTensor(y_val.values), device)
            
        if logs and silent:
            print(f'Net {self.nets[0].model_name}')
            
        for i in range(len(self.nets)):
    
            scores_train = []
            scores_val = []
        
            if val:
                tensor_train_val = move_to(torch.FloatTensor(X_val.values).clone(), device)
                tensor_y_val = move_to(torch.FloatTensor(y_val.values), device)
            scores_train = []
            scores_val = []

            opt = torch.optim.Adam(self.nets[i].parameters(), lr=5e-4)

            if float_subset:
                subset_ind = np.random.uniform(subset, 1)

            subset_ind = np.random.choice(batch_size, int(batch_size*subset))
            
            if not silent:
                print(f'>>> Net {i + 1}/{len(self.nets)}')
                print(f'Deep size: {self.nets[i].deep_size}')
                print()
                
                
            for epoch in range(iters):
                offset = 0
                #learning cycle
                while offset + batch_size < y.shape[0]: 
                    self.nets[i].train()
                    self.nets[i].zero_grad()
                    X_tensor = move_to(torch.FloatTensor(X.iloc[offset:offset+batch_size, :].values), device)
                    y_tensor = move_to(torch.FloatTensor(y[offset:offset+batch_size].values), device)
                    X_tensor = X_tensor[subset_ind,:]
                    y_tensor = y_tensor[subset_ind]
                    
                    loss = pearson_loss(self.nets[i].tensor_forward(X_tensor).view(-1), y_tensor)
                    loss.backward()
                    opt.step()
                    offset += batch_size

                #prints
                if val:
                    self.nets[i].eval()
                    val_loss = pearson_loss(self.nets[i].tensor_forward(tensor_train_val).view(-1), tensor_y_val)
                    scores_val.append(float(val_loss.detach()))
                scores_train.append(float(loss.detach()))

                if not silent:
                    print(f'epoch {epoch} finished with {loss} loss')
                    if val:
                        print(f'val loss {val_loss}')
                
            if logs and silent:
                print(f'Net {i+1}/{len(self.nets)} fitted')
                
            if not silent:
                if val:
                    print(f'Finished with \n train loss: {loss}\n val loss: {val_loss}')
                else:
                    print(f'Finished with {loss} loss')
            if plot:
                if val:
                    sns.lineplot(x=range(iters), y=scores_train, label='train')
                    sns.lineplot(x=range(iters), y=scores_val, label='val')
                else:
                    sns.lineplot(scores_train)
                plt.plot()

        
    def predict(self, X):
        y_pred = np.ndarray([len(self.nets), X.shape[0]])
        for i in range(len(self.nets)):
            self.nets[i].eval()
            y_pred[i] = np.array(self.nets[i].forward(X).detach().view(-1).cpu())
        y_pred = corr_based_coefs(y_pred).dot(y_pred)
        return y_pred
    
    def save(self):
        import os
        try:
            os.mkdir(f'./{self.model_name}')
        except:
            pass
        with open(f'./{self.model_name}/config.txt', 'w') as f:
            f.write(str(self.nets[0].index) + '\n')
            for i in range(len(self.nets)):
                torch.save(self.nets[i].state_dict(), f'./{self.model_name}/model:{i}')
                f.write(str(self.nets[i].deep_size) + '\n')

    
                       
    def load(self):
        with open(f'../input/ubiquantdevelop/{self.model_name}/config.txt', 'r') as f:
            dnn_model = dnn_models[int(f.readline())]
            
            for i in range(len(self.nets)):
                self.nets[i] = dnn_model(int(f.readline()))
                self.nets[i].load_state_dict(torch.load(f'../input/ubiquantdevelop/{self.model_name}/model:{i}', map_location=torch.device('cpu')))
                self.nets[i] = move_to(self.nets[i], device)

In [27]:
#0   0.16049979767507658
#1   0.16195405253013023
#2   0.16213173833206326
#3   0.16256979869460275
#4

#написать ноутбук как меняется необходимое количество эпох в завимисимости от размера выбранной части выборки
#то есть для подвыборки лучшее качетство получается на n итерациях, как дело обстоит если увеличивать выборку
#лучше взять датасет для регресии и поменьше
#можно взять несколько датасетов

In [28]:
#nn_model = StackedDNN(18, DNN_Model, min_deep=280, max_deep=301)
#nn_model.fit(data, target, 10, subset=0.85) -->.1512

In [29]:
nn_model = StackedDNN(15, DNN_Model, min_deep=280, max_deep=301)
nn_model.fit(data, target, 10, subset=0.85)
nn_model.save()

Net DNN_Model_0
Net 1/15 fitted
Net 2/15 fitted
Net 3/15 fitted
Net 4/15 fitted
Net 5/15 fitted
Net 6/15 fitted
Net 7/15 fitted
Net 8/15 fitted
Net 9/15 fitted
Net 10/15 fitted
Net 11/15 fitted
Net 12/15 fitted
Net 13/15 fitted
Net 14/15 fitted
Net 15/15 fitted


In [30]:
nn_model_1 = StackedDNN(8, DNN_Model_1, min_deep=40, max_deep=41)
nn_model_1.fit(data, target, 8, subset=0.85)
nn_model_1.save()

Net DNN_Model_1
Net 1/8 fitted
Net 2/8 fitted
Net 3/8 fitted
Net 4/8 fitted
Net 5/8 fitted
Net 6/8 fitted
Net 7/8 fitted
Net 8/8 fitted


In [31]:
nn_model_2 = StackedDNN(15, DNN_Model_2, min_deep=220, max_deep=250)
nn_model_2.fit(data, target, 10, subset=0.85)
nn_model_2.save()

Net DNN_Model_2
Net 1/15 fitted
Net 2/15 fitted
Net 3/15 fitted
Net 4/15 fitted
Net 5/15 fitted
Net 6/15 fitted
Net 7/15 fitted
Net 8/15 fitted
Net 9/15 fitted
Net 10/15 fitted
Net 11/15 fitted
Net 12/15 fitted
Net 13/15 fitted
Net 14/15 fitted
Net 15/15 fitted


In [32]:
nn_model_3 = StackedDNN(15, DNN_Model_3, min_deep=90, max_deep=110)
nn_model_3.fit(data, target, 9, subset=0.85)
nn_model.save()

Net DNN_Model_3
Net 1/15 fitted
Net 2/15 fitted
Net 3/15 fitted
Net 4/15 fitted
Net 5/15 fitted
Net 6/15 fitted
Net 7/15 fitted
Net 8/15 fitted
Net 9/15 fitted
Net 10/15 fitted
Net 11/15 fitted
Net 12/15 fitted
Net 13/15 fitted
Net 14/15 fitted
Net 15/15 fitted


In [33]:
nn_model_4 = StackedDNN(20, DNN_Model_4, min_deep=90, max_deep=110)
nn_model_4.fit(data, target, 9, subset=0.85)
nn_model.save()

Net DNN_Model_4
Net 1/20 fitted
Net 2/20 fitted
Net 3/20 fitted
Net 4/20 fitted
Net 5/20 fitted
Net 6/20 fitted
Net 7/20 fitted
Net 8/20 fitted
Net 9/20 fitted
Net 10/20 fitted
Net 11/20 fitted
Net 12/20 fitted
Net 13/20 fitted
Net 14/20 fitted
Net 15/20 fitted
Net 16/20 fitted
Net 17/20 fitted
Net 18/20 fitted
Net 19/20 fitted
Net 20/20 fitted


In [34]:
from catboost import CatBoostRegressor
model = CatBoostRegressor(iterations=10000, task_type='GPU', devices='0', silent=True, subsample=0.9, bootstrap_type='Poisson').fit(data, target)

In [35]:
model.save_model("cat_model")

In [36]:
"""
best = 0
for i in range(100000):
    a = np.random.rand(7)
    for j in range(7):
        if (np.random.rand() <= 0.06):
            a[j] = 0
    a /= np.sum(a)
    total = pearsonr(a[0]*res + a[1]*res_1 + a[2]*res_2 + a[3]*res_3 + a[4]*res_4 + a[5]*res_5 + a[6]*res_boost, y_test)[0]
    if total > best:
        best = total
        print(a, best, i)
"""

'\nbest = 0\nfor i in range(100000):\n    a = np.random.rand(7)\n    for j in range(7):\n        if (np.random.rand() <= 0.06):\n            a[j] = 0\n    a /= np.sum(a)\n    total = pearsonr(a[0]*res + a[1]*res_1 + a[2]*res_2 + a[3]*res_3 + a[4]*res_4 + a[5]*res_5 + a[6]*res_boost, y_test)[0]\n    if total > best:\n        best = total\n        print(a, best, i)\n'

In [37]:
#[0.001, 0.337, 0.116, 0.01, 0.536] - coefs

In [38]:
#pearsonr(0*res + 0*res_1 + 0*res_2 + 0*res_3 + 1/6*res_4 + 0*res_5, y_test)[0]

In [39]:
#1-0.111-0.43-0.02

In [40]:
"""
a = 0.8
print(pearsonr(res, y_test)[0])
print(pearsonr(res_boost, y_test)[0])
pearsonr(a*res_boost+(1-a)*res, y_test)[0]
"""

'\na = 0.8\nprint(pearsonr(res, y_test)[0])\nprint(pearsonr(res_boost, y_test)[0])\npearsonr(a*res_boost+(1-a)*res, y_test)[0]\n'

In [41]:
"""
import ubiquant
env = ubiquant.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test set and sample submission
for (test_df, sample_prediction_df) in iter_test:
    res = nn_model.predict(test_df[features])
    res_1 = nn_model_1.predict(test_df[features])
    res_2 = nn_model_2.predict(test_df[features])
    res_3 = nn_model_3.predict(test_df[features])
    res_boost = model.predict(test_df[features])
    #[0.001, 0.337, 0.116, 0.01, 0.536]
    test_df['target']  = 0.001*res + 0.337*res_1 + 0.116*res_2 + 0.01*res_3 + 0.536*res_boost
    env.predict(test_df[['row_id','target']])
"""

"\nimport ubiquant\nenv = ubiquant.make_env()   # initialize the environment\niter_test = env.iter_test()    # an iterator which loops over the test set and sample submission\nfor (test_df, sample_prediction_df) in iter_test:\n    res = nn_model.predict(test_df[features])\n    res_1 = nn_model_1.predict(test_df[features])\n    res_2 = nn_model_2.predict(test_df[features])\n    res_3 = nn_model_3.predict(test_df[features])\n    res_boost = model.predict(test_df[features])\n    #[0.001, 0.337, 0.116, 0.01, 0.536]\n    test_df['target']  = 0.001*res + 0.337*res_1 + 0.116*res_2 + 0.01*res_3 + 0.536*res_boost\n    env.predict(test_df[['row_id','target']])\n"