In [1]:
import pandas as pd
import numpy as np
import pickle
import tqdm

import multiprocess as mp
from multiprocess import Pool

import torch
from torch import nn
from torch.optim import AdamW

import sklearn
import sklearn.model_selection
import sklearn.pipeline

import matplotlib.pyplot as plt
import joblib

In [2]:
mp.set_start_method('spawn')

In [3]:
joblib.cpu_count()

8

In [5]:
with open('../mpra_griesemer/regression/hpp_search_1st.pickle','rb') as f:
    data=pickle.load(f)

In [6]:
X, y = data['X'], data['y']

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,test_size=0.2,random_state=42)

In [7]:
from mlp_regressor import MLPRegressor

In [27]:
%%timeit -n 1 -r 1

M = MLPRegressor(p_dropout=0.2,weight_decay=0,lr=1e-3
                 ,batch_size=4096,hidden_layer_sizes=(1024,128,32))

M.set_params(N_epochs=300)

M.fit(X_train, y_train, X_test, y_test)

print(M.score(X_test,y_test))

0.5560672480397102
41.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [None]:
t,v = zip(*M.history)

In [None]:
np.mean(v[-10:])

In [None]:
fig, ax = plt.subplots()
ax.plot(v)
ax.plot(t)
ax.grid()

In [14]:
def apply_regression(args):

    import sklearn.preprocessing
    import sklearn.pipeline
    import sklearn.svm
    from mlp_regressor import MLPRegressor
    
    X_train, y_train, X_test,y_test = args

    best_hpp =  {'C': 4.057, 'epsilon': 0.39, 'gamma': 0.0046}
    pipe = sklearn.pipeline.make_pipeline(sklearn.preprocessing.StandardScaler(),
                                                  sklearn.svm.SVR(**best_hpp))
    #pipe = sklearn.pipeline.make_pipeline(sklearn.preprocessing.StandardScaler(),
    #                                              MLPRegressor(p_dropout=0.5,weight_decay=0,lr=5e-4,
    #                                                            batch_size=1024,hidden_layer_sizes=(1024,128,32)))

    pipe.fit(X_train,y_train)
        
    y_pred = pipe.predict(X_test)
                
    print('done')

    return (1,2)

In [15]:
kfold=sklearn.model_selection.KFold(n_splits=5)

def run_pool():
    
    all_res = []
    
    pool = Pool(processes=8,maxtasksperchild=3)

    for res in pool.imap(apply_regression,((X_train[train_idx],y_train[train_idx],X_train[test_idx],y_train[test_idx]) 
                                           for train_idx,test_idx in kfold.split(X_train,y_train))):
        all_res.extend(res)
     
    pool.close()
    pool.join()
    
    return all_res



In [16]:
%%timeit -n 1 -r 1

all_res = run_pool()

done
done
done
done
done
56 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [33]:

import sklearn.neural_network

M=sklearn.neural_network.MLPRegressor(hidden_layer_sizes=(1024,128,32,), 
                                      solver='adam', 
                                      alpha=0., batch_size=1024, 
                                      learning_rate='constant', 
                                      learning_rate_init=5e-4, 
                                      power_t=0.5, max_iter=500, 
                                      shuffle=True, random_state=42, 
                                      tol=0.0001, verbose=True, 
                                      warm_start=False, 
                                      momentum=0.9, nesterovs_momentum=True, 
                                      early_stopping=False, 
                                      validation_fraction=0., 
                                      n_iter_no_change=1000000)

In [None]:
%%timeit -n 1 -r 1

M.fit(X_train, y_train)

y_pred=M.predict(X_test)

pearson_r(y_test,y_pred)

In [6]:
class MLPRegressor(nn.Module):
    def __init__(self, layer_sizes=(769,64,32,16,), 
                 p_dropout=0):
        
        super().__init__()
        
        model_layers = []
        
        for layer_idx in range(len(layer_sizes)-1):
            model_layers.extend((nn.Linear(layer_sizes[layer_idx],layer_sizes[layer_idx+1]), nn.Dropout(p_dropout), nn.ReLU(),))
        model_layers.append(nn.Linear(layer_sizes[-1],1))
        self.model = nn.Sequential(*model_layers)

    def forward(self,x):
        return self.model(x)

In [9]:
%%timeit -n 1 -r 1

import torch.multiprocessing as mp

def pearson_r_scorer(y_true, y_pred):
    y_true = y_true.detach().numpy()
    y_pred = y_pred.detach().numpy()
    return pearson_r(y_true, y_pred) ** 2 

def train(model,X_train, y_train, X_val=None, y_val=None, 
          weight_decay=0, lr=5e-4,
          N_epochs=100,batch_size=1024, scorer=pearson_r_scorer):
    
    # Construct data_loader, optimizer, etc.

    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)

    if X_val is not None:
        X_val = torch.tensor(X_val, dtype=torch.float32)
        y_val = torch.tensor(y_val, dtype=torch.float32)

    optimizer = AdamW(model.parameters(), weight_decay=weight_decay, lr=lr)

    loss_fn = nn.MSELoss()

    history = []

    batches_per_epoch = int(np.ceil(len(X_train)//batch_size))

    for epoch in range(N_epochs):
        train_score, val_score = 0, None
        for batch_idx in range(batches_per_epoch):
            # take a batch
            X_batch = X_train[batch_idx*batch_size:(batch_idx+1)*batch_size]
            y_batch = y_train[batch_idx*batch_size:(batch_idx+1)*batch_size]
            # forward pass
            y_pred = model(X_batch).reshape(-1)
            loss = loss_fn(y_pred, y_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            train_score += scorer(y_batch, y_pred)/batches_per_epoch
        if X_val is not None:
            model.eval()
            with torch.no_grad():
                y_pred = model(X_val).reshape(-1)
            val_score = scorer(y_val, y_pred)
        history.append((train_score,val_score))

def run_parallel():
    num_processes = 5
    model = MLPRegressor()
    # NOTE: this is required for the ``fork`` method to work
    model.share_memory()
    processes = []
    for rank in range(num_processes):
        p = mp.Process(target=train, args=(model,X_train,y_train))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()

run_parallel()

2min 16s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [None]:
all_res = run_pool()

In [None]:
list(range(0,105,10))

In [None]:
# Define the model
p_dropout = 0.

model = nn.Sequential(
    nn.Linear(128, 64),
    nn.Dropout(p_dropout),
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.Dropout(p_dropout),
    nn.ReLU(),
    nn.Linear(32, 16),
    nn.Dropout(p_dropout),
    nn.ReLU(),
    nn.Linear(16, 1)
)

optimizer = optim.Adam(model.parameters(), lr=0.0005)

# train-test split of the dataset
gss = GroupShuffleSplit(n_splits=1, train_size=.8, random_state=1)
train_index, test_index = next(iter(gss.split(X, y, groups)))
X_train, X_test, y_train, y_test = X[train_index,:],X[test_index,:],y[train_index],y[test_index]
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)
 
# training parameters
n_epochs = 1000   # number of epochs to run
batch_size = 1000  # size of each batch
batch_start = torch.arange(0, len(X_train), batch_size)
 
# Hold the best model
best_score = -np.inf   # init to infinity
best_weights = None
history = []
 
# training loop
for epoch in range(n_epochs):
    model.train()
    with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=False) as bar:
        bar.set_description(f"Epoch {epoch}")
        for start in bar:
            # take a batch
            X_batch = X_train[start:start+batch_size]
            y_batch = y_train[start:start+batch_size]
            # forward pass
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            # backward pass
            optimizer.zero_grad()
            loss.backward()
            # update weights
            optimizer.step()
            # print progress
            train_score = pearson_r2(y_batch, y_pred)
    # evaluate accuracy at end of each epoch
    model.eval()
    y_pred = model(X_test)
    test_score = pearson_r2(y_test, y_pred)
    bar.set_postfix(r2_train=train_score)
    print(train_score,test_score)
    history.append(test_score)
    if test_score < best_score:
        best_mse = test_score
        best_weights = copy.deepcopy(model.state_dict())

In [None]:
y_pred.detach().numpy().shape

In [None]:
y