In [1]:
matplotlib inline

In [2]:
import os
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [3]:
wowTokenAppendedData = []
regions = ['NA', 'EU', 'CN', 'KR', 'TW']

for entry in os.scandir('./input/wowtoken'):
    if entry.is_file():
        wowTokenEntry = pd.read_csv(entry.path)
        wowTokenEntry['region'] = (os.path.splitext(entry.name)[0])
        wowTokenEntry['date'] = pd.to_datetime(wowTokenEntry['date'])
        wowTokenAppendedData.append(wowTokenEntry)
        
data = pd.concat(wowTokenAppendedData)

In [4]:
data.pivot(columns='region', values='price').describe()

region,CN,EU,KR,NA,TW
count,6835.0,7744.0,7642.0,7886.0,7468.0
mean,242416.955377,164289.345687,271204.557969,95573.815876,260885.958356
std,168907.809419,101425.233611,98936.313849,64075.63775,104487.473796
min,48604.0,30352.0,121305.0,18296.0,114619.0
25%,83650.0,68769.25,179645.5,37004.25,174211.0
50%,215554.0,171327.0,285099.5,89460.5,213519.0
75%,400670.0,256468.75,350899.25,163553.25,335554.5
max,586090.0,401827.0,595930.0,238572.0,501220.0


In [5]:
data2018 = data.loc[data['date'].dt.year >= 2018].reset_index()
data2018.pivot(columns='region', values='price').describe()

region,CN,EU,KR,NA,TW
count,1983.0,2076.0,2088.0,2085.0,2085.0
mean,414124.769037,272073.89499,365853.373084,165006.325659,381305.507434
std,129721.911793,68360.274879,76882.410805,41141.510822,75199.125368
min,206304.0,168106.0,269461.0,100993.0,250412.0
25%,229674.0,184120.0,298436.0,111341.0,284082.0
50%,496259.0,298415.5,322257.5,182392.0,413693.0
75%,507332.5,336512.5,441929.75,199718.0,441348.0
max,586090.0,401827.0,595930.0,238572.0,501220.0


In [6]:
data['unix_timestamp'] = data['date'].astype(np.int64) // 10**9
data.head()

Unnamed: 0,date,price,region,unix_timestamp
0,2015-05-06 22:04:08,250000,TW,1430949848
1,2015-05-07 02:04:09,275180,TW,1430964249
2,2015-05-07 06:04:01,308756,TW,1430978641
3,2015-05-07 10:04:03,342634,TW,1430993043
4,2015-05-07 14:05:10,328547,TW,1431007510


In [7]:
dataNA = data.loc[data['region'] == 'NA'].drop(['date', 'region'], axis=1)
dataNA.head()

Unnamed: 0,price,unix_timestamp
0,30000,1428426758
1,30906,1428444242
2,28515,1428458642
3,25024,1428473042
4,24038,1428487477


In [8]:
def normalize_price(prices):
    minPrice = min(prices)
    maxPrice = max(prices)
    
    normalized_prices = list(map(lambda p: (p-minPrice)/(maxPrice-minPrice), prices))
    
    return normalized_prices

In [9]:
dataNA['norm_price'] = normalize_price(dataNA['price'])

In [10]:
dataNA['norm_price'].describe()

count    7886.000000
mean        0.350823
std         0.290888
min         0.000000
25%         0.084931
50%         0.323070
75%         0.659433
max         1.000000
Name: norm_price, dtype: float64

## Basic Model

This model will predict the next price rise/decrease using the last price variation.

In [11]:
class BasicModel:
    def __init__(self):
        self.currentPrediction = 0 # 1 for rise, 0 for decrease
        self.lastPrice = 0
        
    def make_next_prediction(self, current_price):
        self.currentPrediction = 1 if self.lastPrice <= current_price else 0
        self.lastPrice = current_price
        return self.currentPrediction
    
    def measure_accuracy(self, prices):
        realVariation = 0 # 1 for rise, 0 for decrease
        lastPrice = 0
        correctPredictions = 0
        for price in prices:
            realVariation = 1 if lastPrice <= price else 0
            
            pred = self.make_next_prediction(price)
            correctPredictions += 1 if pred == realVariation else 0
            
        return correctPredictions/len(prices)
    
            
model = BasicModel()

In [12]:
basicModelAccuracy = model.measure_accuracy(dataNA['norm_price'])

## Random Model

This model will predict the next price rise/decrease randomly.

In [13]:
np.random.seed(1212)

class RandomModel:
    def __init__(self):
        self.currentPrediction = 0 # 1 for rise, 0 for decrease
        
    def make_next_prediction(self, current_price):
        self.currentPrediction = np.random.randint(2) # randomly gets 0 or 1
        return self.currentPrediction
    
    def measure_accuracy(self, prices):
        realVariation = 0 # 1 for rise, 0 for decrease
        lastPrice = 0
        correctPredictions = 0
        for price in prices:
            realVariation = 1 if lastPrice <= price else 0
            
            pred = self.make_next_prediction(price)
            correctPredictions += 1 if pred == realVariation else 0
            
        return correctPredictions/len(prices)
    
            
model = RandomModel()

In [14]:
randomModelAccuracy = model.measure_accuracy(dataNA['norm_price'])

## Models Accuracy

In [15]:
print('-'*40)
print('Model\t\t\t\tAccuracy')
print('Basic Prediction Model\t\t%.6f' %basicModelAccuracy)
print('Random Prediction Model\t\t%.6f' %randomModelAccuracy)
print('-'*40)

----------------------------------------
Model				Accuracy
Basic Prediction Model		0.512300
Random Prediction Model		0.489855
----------------------------------------
