In [1]:
matplotlib inline

In [2]:
import os
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [3]:
wowTokenAppendedData = []
regions = ['NA', 'EU', 'CN', 'KR', 'TW']

for entry in os.scandir('./input/wowtoken'):
    if entry.is_file():
        wowTokenEntry = pd.read_csv(entry.path)
        wowTokenEntry['region'] = (os.path.splitext(entry.name)[0])
        wowTokenEntry['date'] = pd.to_datetime(wowTokenEntry['date'])
        wowTokenAppendedData.append(wowTokenEntry)
        
data = pd.concat(wowTokenAppendedData)

In [4]:
data.pivot(columns='region', values='price').describe()

region,CN,EU,KR,NA,TW
count,6835.0,7744.0,7642.0,7886.0,7468.0
mean,242416.955377,164289.345687,271204.557969,95573.815876,260885.958356
std,168907.809419,101425.233611,98936.313849,64075.63775,104487.473796
min,48604.0,30352.0,121305.0,18296.0,114619.0
25%,83650.0,68769.25,179645.5,37004.25,174211.0
50%,215554.0,171327.0,285099.5,89460.5,213519.0
75%,400670.0,256468.75,350899.25,163553.25,335554.5
max,586090.0,401827.0,595930.0,238572.0,501220.0


In [5]:
dataNA = data.loc[data['region'] == 'NA'].drop(['date', 'region'], axis=1)
dataCN = data.loc[data['region'] == 'CN'].drop(['date', 'region'], axis=1)
dataEU = data.loc[data['region'] == 'EU'].drop(['date', 'region'], axis=1)
dataKR = data.loc[data['region'] == 'KR'].drop(['date', 'region'], axis=1)
dataTW = data.loc[data['region'] == 'TW'].drop(['date', 'region'], axis=1)

In [6]:
def calculate_short_SMA(prices, period):
    if len(prices) < period:
        return 0
    
    return np.mean(prices[-10:])
    
def calculate_long_SMA(prices, period):
    if len(prices) < period:
        return 0
    
    return np.mean(prices[-50:])

def calculate_SMAs(data):
    prices = data['price'].values
    
    shortSMAs = []
    longSMAs = []
    pricesSeen = []
    for price in prices:
        pricesSeen.append(price)
        
        shortSMAs.append(calculate_short_SMA(pricesSeen, 10))
        longSMAs.append(calculate_long_SMA(pricesSeen, 50))
        
    data['short_sma'] = shortSMAs
    data['long_sma'] = longSMAs
    
    return data

In [7]:
dataNA = calculate_SMAs(dataNA)
dataCN = calculate_SMAs(dataCN)
dataEU = calculate_SMAs(dataEU)
dataKR = calculate_SMAs(dataKR)
dataTW = calculate_SMAs(dataTW)

In [8]:
def make_prediction(data, index):
    if index == 0:
        return 0
    
    else:
        shortSMA = data.loc[index, 'short_sma']
        lastShortSMA = data.loc[index-1, 'short_sma']
        longSMA = data.loc[index, 'long_sma']
        lastLongSMA = data.loc[index-1, 'long_sma']
        
        if shortSMA > lastShortSMA and longSMA > lastLongSMA:
            # Both SMAs are increasing, so the tendency is to rise
            return 1

        elif shortSMA <= lastShortSMA and longSMA <= lastLongSMA:
            # Both SMAs are decreasing, so the tendency is to fall
            return 0

        elif lastShortSMA <= longSMA and shortSMA > longSMA:
            # The short SMA crossed the long SMA by increasing itself, so in this case,
            # we hope that the short SMA goes back to the long SMA, so the tendency is to fall
            return 0

        elif lastShortSMA > longSMA and shortSMA <= longSMA:
            # The short SMA crossed the long SMA by decreasing itself, so in this case,
            # we hope that the short SMA goes back to the long SMA, so the tendency is to rise
            return 1
        
        else:
            return 0

In [9]:
preds = []
for index in range(len(dataNA)):
    preds.append(make_prediction(dataNA, index))
    
dataNA['simple_prediction'] = preds

In [10]:
previousPrices = [0]
previousPrices.extend(dataNA['price'][:-1])
dataNA['previous_price'] = previousPrices

In [11]:
riseOrDecrease = [0]
for index in range(len(dataNA['price'])):
    if index > 0:
        riseOrDecrease.append(1 if dataNA['price'][index] > dataNA['price'][index-1] else 0)
dataNA['rise_or_decrease'] = riseOrDecrease

In [12]:
dataNA.tail(10)

Unnamed: 0,price,short_sma,long_sma,simple_prediction,previous_price,rise_or_decrease
7876,106512,107315.8,107463.58,0,106678,0
7877,106376,107151.8,107425.2,0,106512,0
7878,106238,106985.8,107383.72,0,106376,0
7879,106106,106825.9,107339.12,0,106238,0
7880,105973,106660.6,107291.78,0,106106,0
7881,105905,106505.0,107243.7,0,105973,0
7882,105987,106373.7,107200.04,0,105905,1
7883,106095,106276.4,107162.62,0,105987,1
7884,106068,106193.8,107128.8,0,106095,0
7885,106087,106134.7,107101.36,0,106068,1


In [13]:
from keras import regularizers
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import Adam

class NeuralNetwork:
    def __init__(self):
        self.model = Sequential()
        self.model.add(Dense(100, input_dim=3, activation='relu'))
        self.model.add(Dense(10, activation='relu'))
        self.model.add(Dense(1, activation='softmax'))

        self.model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(0.001), metrics=['accuracy'])

        self.model.summary()

    def train(self, x_train, y_train):
        self.model.fit(x_train, y_train, epochs=40, validation_split=0.2)


Using TensorFlow backend.


In [14]:
nn = NeuralNetwork()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               400       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 11        
Total params: 1,421
Trainable params: 1,421
Non-trainable params: 0
_________________________________________________________________


In [15]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(dataNA[['short_sma', 'long_sma', 'previous_price']].values, dataNA['rise_or_decrease'].values, test_size=0.33)

In [16]:
nn.train(x_train, y_train)

Train on 4226 samples, validate on 1057 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [17]:
predictions = nn.model.predict(x_test)
predictions

array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]], dtype=float32)

In [18]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=(100,2))

In [19]:
mlp.fit(x_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 2), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [20]:
mlp.predict(x_test)

array([1, 1, 1, ..., 1, 1, 1])