# High Freq Model

在这个notebook中，我们用训练高频模型。这里的代码做为模型更新线程的基础。

In [None]:
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import time

pd.options.mode.chained_assignment = None

In [None]:
def to_pm(s):
    '''
    提取序列中的正负号 ( e.g. [-3, 2, -2, 3] -> [-1, 1, -1, 1] )
    s : array
    '''
    s_pm = np.zeros(s.shape)
    for i in range(len(s)):
        if s[i] > 0:
            s_pm[i] = 1
        if s[i] < 0:
            s_pm[i] = -1
    return s_pm
def calc_accuracy(pred, real):
    return (1 + np.sum(to_pm(pred) * to_pm(real))/len(pred))/2

In [None]:
alphas34 = pd.read_csv('data/alphas34.csv').set_index('Unnamed: 0')
alphasbasic = pd.read_csv('data/alphasbasic.csv').set_index('Unnamed: 0')

In [None]:
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     print(alphasbasic.loc[: ,'obj10':].corr().iloc[:, 0])

In [None]:
# alphaall = alphasbasic.join(alphas34.iloc[:, 52:]).iloc[:, 49:] \
#     .replace([np.inf, -np.inf], np.nan).dropna()

# alphaall.replace({False: 0, True: 1}, inplace=True)

In [None]:
# alphaall

In [None]:
alphaall_sub = alphasbasic.join(alphas34.iloc[:, 52:]).iloc[:, 49:60] \
    .replace([np.inf, -np.inf], np.nan).dropna()
alphaall_sub

In [None]:
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     print(alphaall.corr().iloc[:, 0])

In [None]:
# for i in range(61):
#     print([alphaall.columns[i], alphaall.iloc[:, i].mean(), alphaall.iloc[:, i].var()])

In [None]:
# for i in range(61):
#     plt.figure()
#     alphaall.iloc[:, i].hist()

# MLP

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle

data_shuf = shuffle(alphaall_sub, random_state = 2021)

y_train = data_shuf.iloc[:1000000, 0].values
X_train = data_shuf.iloc[:1000000, 1:].values
y_validate = data_shuf.iloc[1000000:1300000, 0].values
X_validate = data_shuf.iloc[1000000:1300000, 1:].values
y_test = data_shuf.iloc[1300000:, 0].values
X_test = data_shuf.iloc[1300000:, 1:].values

In [None]:
from sklearn.neural_network import MLPRegressor

hidden_layer_sizes = (10, 10)
max_iter = 10
alpha = 0.0001
learning_rate_init = 0.0005

nnr = MLPRegressor(hidden_layer_sizes = hidden_layer_sizes, 
                   max_iter = max_iter, 
                   alpha = alpha, 
                   learning_rate_init = learning_rate_init, 
                   random_state = 2021, verbose = True)

nnr.fit(X_train, y_train)

In [None]:
y_train_pred = nnr.predict(X_train)

plt.figure(figsize=(8, 8))
plt.plot(y_train_pred, y_train, 'go')
plt.plot([-1,1], [-1,1], 'grey', ls = '--', label = 'y = x')
plt.plot([-1,1], np.poly1d(np.polyfit(y_train_pred, y_train, 1))([-1,1]), 'g--', label = 'fit')
plt.xlabel('pred')
plt.ylabel('real')
plt.grid()
plt.legend()
plt.show()
mean_squared_error(y_train_pred, y_train), calc_accuracy(y_train_pred, y_train)

In [None]:
y_validate_pred = nnr.predict(X_validate)

plt.figure(figsize=(8, 8))
plt.plot(y_validate_pred, y_validate, 'go')
plt.plot([-1,1], [-1,1], 'grey', ls = '--', label = 'y = x')
plt.plot([-1,1], np.poly1d(np.polyfit(y_validate_pred, y_validate, 1))([-1,1]), 'g--', label = 'fit')
plt.xlabel('pred')
plt.ylabel('real')
plt.grid()
plt.legend()
plt.show()
mean_squared_error(y_validate_pred, y_validate), calc_accuracy(y_validate_pred, y_validate)

In [None]:
y_validate[y_validate_pred.argsort()[-100:]] # long

In [None]:
y_validate[y_validate_pred.argsort()[:100]] # short

In [None]:
pickle.dump(nnr, open('Models/MLP_SOIR.sav', 'wb'))