In [1]:
import poloniex
import time
import numpy as np
import math
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import rcParams
from matplotlib.ticker import *
%matplotlib inline

In [2]:
polo = poloniex.Poloniex()
# 変換可能な通貨一覧を取得
pairs = [pair for pair in polo.returnTicker()]

In [3]:
#データフレームの作成
currency_histrical_data = pd.DataFrame()

In [28]:
#サンプリングする時間の変数を設定
end_time = time.time()
num_data = 22000

#サンプリング間隔の設定(分)
sampling_period = 15
sampling_period_sec = sampling_period * 60

start_time = time.time()-num_data*sampling_period_sec

#現在時刻取得
current_time = datetime.datetime.now().strftime('%Y%m%d')

#取得する通貨情報一覧
convert_list_all = ['USDT_ETH','USDT_BTC','USDT_ETC','USDT_STR','USDT_DASH',
                'USDT_REP','USDT_LTC','USDT_XMR','USDT_BCH','USDT_NXT',
                'USDT_ZEC','USDT_XRP']
#USDT_BCHは欠損値が多いので、除外
convert_list = ['USDT_ETH','USDT_BTC','USDT_ETC','USDT_STR','USDT_DASH',
                'USDT_REP','USDT_LTC','USDT_XMR','USDT_NXT','USDT_ZEC','USDT_XRP']

#予測対象の通貨ペアの設定
target_pair = 'USDT_BTC'

save_file_train = './currency_data/'+ target_pair + '_'+ str(sampling_period) + 'minutes_4month_'+ current_time + '.csv'

In [5]:
print(start_time, ":",end_time, ":", (end_time - start_time)/5/60)

1506293482.325984 : 1526093482.325984 : 66000.0


In [7]:
# 指定したサンプリング間隔で120日分のデータを読み込む
for each_currency in convert_list:
    chart_data = polo.returnChartData(each_currency, period=sampling_period_sec, start=start_time, end=end_time)    
    chart_frame = pd.DataFrame(chart_data)
    chart_frame['date_date'] = chart_frame.date.apply(lambda x:datetime.datetime.fromtimestamp(int(x)).strftime('%Y-%m-%d %H:%M:%S'))
    currency_histrical_data[each_currency + "_close"]=chart_frame['close']
    currency_histrical_data[each_currency + "_high"]=chart_frame['high']
    currency_histrical_data[each_currency + "_low"]=chart_frame['low']
    currency_histrical_data[each_currency + "_open"]=chart_frame['open']
    currency_histrical_data[each_currency + "_volume"]=chart_frame['volume']
    time.sleep(1) #IPがbanされないように、1秒間sleep
currency_histrical_data['DATE_DATE'] = chart_frame['date_date']

In [9]:
currency_histrical_data.to_csv(save_file_train, index=False, encoding='UTF-8_sig')

In [29]:
#データの読み込み
currency_hist_data = pd.read_csv(save_file_train, encoding='UTF-8_sig')
#日付のDATE型に変更しておく
currency_hist_data['DATE'] = currency_hist_data.DATE_DATE.apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
currency_hist_data = currency_hist_data.drop(["DATE_DATE"], axis=1)
#データ加工用にdateをindex化
currency_hist_data2 = currency_hist_data.set_index(['DATE'])

In [30]:
#通貨ごとにSMAと±2σのボリンジャーバンドの値、および前の観測タイミングと比較した傾きを変数として追加

#移動平均の算出
def calc_moving_average(baseday, delta, target):
    day_before_20 = baseday - datetime.timedelta(minutes=delta)
    try:
        #一日毎の値を取得
        time_delta_data = currency_hist_data2.loc[(day_before_20):(baseday)]
    except:
        return np.NaN
    
    return  time_delta_data[target + "_close"].mean()

#ボリンジャーバンド算出
def calc_BB(baseday, delta, target, target_sigma, SMA):
    day_before_20 = baseday - datetime.timedelta(minutes=delta)
    try:
        #一日毎の値を取得
        time_delta_data = currency_hist_data2.loc[(day_before_20):(baseday)]
    except:
        return np.NaN
    
    std_ = time_delta_data[target + "_close"].std()
    return 0 if math.isnan(std_) else SMA + target_sigma*std_

In [None]:
sampling_delta = int(5*sampling_period)
for each_currency in convert_list:
    currency_hist_data[each_currency + '_SMA'] = currency_hist_data.apply(lambda x:calc_moving_average(x.DATE, sampling_delta, each_currency), axis=1)
    currency_hist_data[each_currency + '_plus2sigma'] = currency_hist_data.apply(lambda x:calc_BB(x.DATE, sampling_delta, each_currency, 2, x[each_currency + '_SMA']), axis=1)
    currency_hist_data[each_currency + '_plus1sigma'] = currency_hist_data.apply(lambda x:calc_BB(x.DATE, sampling_delta, each_currency, 1, x[each_currency + '_SMA']), axis=1)
    currency_hist_data[each_currency + '_minus1sigma'] = currency_hist_data.apply(lambda x:calc_BB(x.DATE, sampling_delta, each_currency, -1, x[each_currency + '_SMA']), axis=1)
    currency_hist_data[each_currency + '_minus2sigma'] = currency_hist_data.apply(lambda x:calc_BB(x.DATE, sampling_delta, each_currency, -2, x[each_currency + '_SMA']), axis=1)

In [16]:
#データ加工用にdateをindex化
currency_hist_data2 = currency_hist_data.set_index(['DATE'])

#5分前とのdiffを取る
def get_diff(baseday, delta, diff_value, target_column):
    targetday = baseday - datetime.timedelta(minutes=delta)
    try:
        targetday_list = currency_hist_data2.loc[(targetday)]
    except:
        return np.NaN
    return float(diff_value) - float(targetday_list[target_column])

In [17]:
for each_currency in convert_list:
    currency_hist_data[each_currency + '_open_diff'] = currency_hist_data.apply(lambda x:get_diff(x.DATE, sampling_period, x[each_currency + '_open'], each_currency + '_open'), axis=1)
    currency_hist_data[each_currency + '_high_diff'] = currency_hist_data.apply(lambda x:get_diff(x.DATE, sampling_period, x[each_currency + '_high'], each_currency + '_high'), axis=1)
    currency_hist_data[each_currency + '_low_diff'] = currency_hist_data.apply(lambda x:get_diff(x.DATE, sampling_period, x[each_currency + '_low'], each_currency + '_low'), axis=1)
    currency_hist_data[each_currency + '_close_diff'] = currency_hist_data.apply(lambda x:get_diff(x.DATE, sampling_period, x[each_currency + '_close'], each_currency + '_close'), axis=1)
    currency_hist_data[each_currency + '_volume_diff'] = currency_hist_data.apply(lambda x:get_diff(x.DATE, sampling_period, x[each_currency + '_close'], each_currency + '_volume'), axis=1)
    currency_hist_data[each_currency + '_SMA_diff'] = currency_hist_data.apply(lambda x:get_diff(x.DATE, sampling_period, x[each_currency + '_SMA'], each_currency + '_SMA'), axis=1)
    currency_hist_data[each_currency + '_plus2sigma_diff'] = currency_hist_data.apply(lambda x:get_diff(x.DATE, sampling_period, x[each_currency + '_plus2sigma'], each_currency + '_plus2sigma'), axis=1)
    currency_hist_data[each_currency + '_plus1sigma_diff'] = currency_hist_data.apply(lambda x:get_diff(x.DATE, sampling_period, x[each_currency + '_plus1sigma'], each_currency + '_plus1sigma'), axis=1)
    currency_hist_data[each_currency + '_minus1sigma_diff'] = currency_hist_data.apply(lambda x:get_diff(x.DATE, sampling_period, x[each_currency + '_minus1sigma'], each_currency + '_minus1sigma'), axis=1)
    currency_hist_data[each_currency + '_minus2sigma_diff'] = currency_hist_data.apply(lambda x:get_diff(x.DATE, sampling_period, x[each_currency + '_minus2sigma'], each_currency + '_minus2sigma'), axis=1)  

In [24]:
#目的変数の設定
def get_price_after_delta(baseday, delta_minutes, target):
    targetday = baseday + datetime.timedelta(minutes=delta_minutes)
    try:
        targetday_list = currency_hist_data2.loc[(targetday)]
    except:
        return np.NaN
    return targetday_list[target + "_close"]
    
    
currency_hist_data[target_pair + '_close_after_' + str(sampling_period) + 'min'] = currency_hist_data.apply(lambda x:get_price_after_delta(x.DATE, sampling_period, target_pair), axis=1)

In [25]:
#現在と15分後の金額の差分を取る
currency_hist_data[target_pair + '_close_after_' + str(sampling_period) + 'min_diff'] = currency_hist_data.apply(lambda x: x[target_pair + '_close_after_' + str(sampling_period) + 'min'] - x[target_pair + '_SMA'], axis=1)
#上がったか下がったかの０１判定
currency_hist_data[target_pair + '_close_after_' + str(sampling_period) + 'min_flag'] = currency_hist_data.apply(lambda x: 1 if x[target_pair + '_close_after_' + str(sampling_period) + 'min_diff'] > 0 else 0, axis=1)

#現在と15分後の金額の比率を取る
currency_hist_data[target_pair + '_close_after_' + str(sampling_period) + 'min_ratio'] = currency_hist_data.apply(lambda x: x[target_pair + '_close_after_' + str(sampling_period) + 'min']/x[target_pair + '_SMA'], axis=1)

#比率の自然対数もとる
currency_hist_data[target_pair + '_close_after_' + str(sampling_period) + 'min_log'] = currency_hist_data.apply(lambda x: math.log(x[target_pair + '_close_after_' + str(sampling_period) + 'min_ratio']), axis=1)

In [26]:
#加工済みデータの保存
save_file_train2 = './currency_data/'+ target_pair + '_' + str(sampling_period) + 'minutes_4month_' + current_time + '_1.csv'

currency_hist_data.to_csv(save_file_train2, index=False, encoding='UTF-8_sig')

In [0]:
#データの読み込み
coin_data = pd.read_csv(save_file_train2, encoding='UTF-8_sig')
coin_data['DATE'] = coin_data.DATE.apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))

In [0]:
date_border = coin_data.loc[0].DATE + datetime.timedelta(days=20)
coin_data_ = coin_data[coin_data.DATE >= date_border]

In [0]:
save_file_train3 = './currency_data/'+ target_pair + '_' + str(sampling_period) + 'minutes_4month_' + current_time + '_2.csv'
coin_data_.to_csv(save_file_train3, index=False, encoding='UTF-8_sig')