In [51]:
import pandas as pd
from datetime import datetime
import csv

In [4]:
# read data as df
file_name = "N225minif_2017_15m.csv"
df = pd.read_csv(file_name, header=0)
df.head()

Unnamed: 0,date,time,open,high,low,close,volume
0,2017/1/4,16:30,19070,19110,19070,19105,6981
1,2017/1/4,16:45,19105,19120,19100,19105,5843
2,2017/1/4,17:00,19105,19110,19060,19070,9595
3,2017/1/4,17:15,19070,19090,19065,19085,5873
4,2017/1/4,17:30,19085,19095,19080,19080,2809


In [21]:
# date列 time列を結合、datetime型に変換
df['date_time'] = df[['date', 'time']].apply(lambda x: '{}-{}'.format(x[0], x[1]), axis=1)
df['date_time'] = df['date_time'].apply(lambda x: datetime.strptime(x, "%Y/%m/%d-%M:%S"))
del df['date']
del df['time']
df.head()

Unnamed: 0,open,high,low,close,volume,date_time
0,19070,19110,19070,19105,6981,2017-01-04 00:16:30
1,19105,19120,19100,19105,5843,2017-01-04 00:16:45
2,19105,19110,19060,19070,9595,2017-01-04 00:17:00
3,19070,19090,19065,19085,5873,2017-01-04 00:17:15
4,19085,19095,19080,19080,2809,2017-01-04 00:17:30


In [53]:
# 過去10観測に対して、sell buyを行なった場合の勝敗をラベルづけ
# [t0, t1, t2,..., t9, win=1 or lose=0]

# win lose　の条件を設定
take_profit_rate = 15
loss_cut_rate = 15

# 観測地点数を設定
target_num = 10

# 対象とする列名称を設定
col_name = 'close'

data_list = []
for i, row in df.iterrows():
    # 1行分の観測データとwin loseを格納する一時リスト
    temp_list = []
    
    # 10行目からスタート
    if i < target_num:
        continue
    
    # 観測地点分のデータを格納
    temp_list = list(df[col_name][i - target_num: i])

    # 正規化は分析過程の方で行う(正規化の方法によって結果が異なるかもしれないので)
    #観測地点t0の値を基準として、観測地点分のデータを正規化
    std_rate = temp_list[0]
    temp_list = [d/std_rate for d in temp_list]
    
    # win loseの起点となる価格を取得
    base_rate = row[col_name]
    
    # 損切り or 利確となるまでiを進める
    win_lose = None
    time = i + 1
    while True:
        # timeが最終行に到達した場合は抜ける
        if len(df.index) <= time:
            break
        
        # ターゲット価格
        target_rate = df[col_name][time]
        
        # 利確
        if target_rate - base_rate > take_profit_rate:
            win_lose = 1
            break
        # 損切り
        elif base_rate - target_rate > loss_cut_rate:
            win_lose = 0
            break
        # 取引なし
        else:
            time += 1
    
    # win_loseの結果を一時リストに追加
    temp_list.append(win_lose)
    
    # 一時リストをデータリストに追加
    data_list.append(temp_list)

print(data_list[0:10])

[[19105, 19105, 19070, 19085, 19080, 19085, 19095, 19085, 19085, 19080, 1], [19105, 19070, 19085, 19080, 19085, 19095, 19085, 19085, 19080, 19085, 1], [19070, 19085, 19080, 19085, 19095, 19085, 19085, 19080, 19085, 19075, 1], [19085, 19080, 19085, 19095, 19085, 19085, 19080, 19085, 19075, 19075, 1], [19080, 19085, 19095, 19085, 19085, 19080, 19085, 19075, 19075, 19080, 1], [19085, 19095, 19085, 19085, 19080, 19085, 19075, 19075, 19080, 19080, 1], [19095, 19085, 19085, 19080, 19085, 19075, 19075, 19080, 19080, 19075, 1], [19085, 19085, 19080, 19085, 19075, 19075, 19080, 19080, 19075, 19080, 1], [19085, 19080, 19085, 19075, 19075, 19080, 19080, 19075, 19080, 19085, 1], [19080, 19085, 19075, 19075, 19080, 19080, 19075, 19080, 19085, 19075, 1]]


In [55]:
# csvに出力
with open('data_list.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerows(data_list)