In [None]:
import numpy as np
import pandas as pd

from cuml.svm import SVR

import random

import pickle
import matplotlib.pyplot as plt
%matplotlib inline

#  Prepare data

In [None]:
# Define col name
data_types_dict = {
    'time_id': 'int16',
    'investment_id': 'int16',
    "target": 'float32',
}

features = [f'f_{i}' for i in range(300)]

for f in features:
    data_types_dict[f] = 'float32'
    
train = pd.read_csv('../input/ubiquant-market-prediction/train.csv',
                       usecols = data_types_dict.keys(),
                       dtype=data_types_dict,
                       )


# Create model and Train

In [None]:
N_DEVIDE_DATA = 20 #devide train data into 20 datasets, train 19 of them individually and use another to test to prevent memory leaks
n_row = len(train)
idx_list = list(range(n_row))
random.shuffle(idx_list)

In [None]:
# train 19 datasets individually
models = []
for i in range(N_DEVIDE_DATA-1):
    start_idx = int(n_row/N_DEVIDE_DATA)*i
    end_idx = int(n_row/N_DEVIDE_DATA)*(i+1)
    devide_idx_list = idx_list[start_idx:end_idx]
    
    tr = train.iloc[devide_idx_list]
    X = tr[features].to_numpy()
    y = tr['target'].to_numpy()
    
    #Create model
    model = SVR(C=1.0, kernel='rbf', epsilon=0.1)
    
    #Train
    model.fit(X, y)
    r2 = model.score(X, y)  
    print(i,'R^2:',r2)
    models.append(model)

# Validation

In [None]:
#Read test data
start_idx = int(n_row/N_DEVIDE_DATA)*19
devide_idx_list = idx_list[start_idx:]

test = train.iloc[devide_idx_list]
X_test = test[features].to_numpy()
y_test = test['target'].to_numpy()

#predict
pre_y=0
for model in models:
    pre_y += model.predict(X_test)
pre_y /= len(models)

In [None]:
#Result of prediction with test data
plt.scatter(y_test,pre_y)

In [None]:
#Coefficience
np.corrcoef(y_test.tolist(), pre_y.tolist())

# Save Model

In [None]:
#Save model 
for i, model in enumerate(models):
    filename = 'model_svr_{}.sav'.format(i)
    pickle.dump(model, open(filename, 'wb'))

# Read Model

In [None]:
#Read model
models = []
for i in range(19):
    filename = 'model_svr_{}.sav'.format(i)
    loaded_model = pickle.load(open(filename, 'rb'))
    models.append(loaded_model)

# Submit

In [None]:
#Predict Target
import ubiquant
env = ubiquant.make_env()
iter_test = env.iter_test()

for (test_df, sample_prediction_df) in iter_test:
    test_x = test_df[features].to_numpy()
    
    for loaded_model in models:
        sample_prediction_df['target'] += loaded_model.predict(test_x)
    sample_prediction_df['target'] /= len(models)+1
    
    env.predict(sample_prediction_df) 
    display(sample_prediction_df)