In [None]:
import pandas as pd 
import numpy as np
import csv
import talib as ta
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.model_selection import train_test_split

from sklearn.model_selection import GridSearchCV
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_val_score

X_train = 0
y_train = 0

def csv_to_df(dataname):
    df = pd.read_csv("csv/"+dataname)
    columns = ["Date", "Hour", "Open", "High", "Low", "Close", "Volume"]
    df.columns = columns
    df.insert(0, "Datetime", df["Date"].str.cat(df["Hour"]))
    df = df.drop("Date", axis=1)
    df = df.drop("Hour", axis=1)
    df = df.set_index('Datetime')
    return df

def classify(value, r):
    if(value>1+r):
        return 1
    elif(value<1-r):
        return -1
    else:
        return 1
        

def addclass(df, R):
    df["class"] = (df["Close"]/df["Open"]).apply(lambda x: classify(x, R))
    df["class"] = df["class"].shift(-1)
    df = df.dropna()
    count = [(df["class"]==-1).sum(), (df["class"]==0).sum(), (df["class"]==1).sum()]
#     print(count)
    return df

def randomforest_cv(n_estimators, min_samples_split, max_features, max_depth):
    global X_train, y_train
    val = cross_val_score(
        RandomForestClassifier(
            n_estimators=int(n_estimators),
            min_samples_split=int(min_samples_split),
            max_features=max_features,
            random_state=0,
            max_depth = max_depth,
            #max_leaf_nodes = max_leaf_nodes
        ),
        X_train, y_train,
        scoring = 'accuracy',
        cv = 3, # 3-fold
        n_jobs = -1 # use all CPUs
    ).mean()
    return val

        


def predict(df):
    global X_train, y_train
    
    
    df = df.dropna()
    
#     df = df.drop("Open", axis=1)
#     df = df.drop("High", axis=1)
#     df = df.drop("Low", axis=1)
#     df = df.drop("Close", axis=1)
#     df = df.drop("Volume", axis=1)
    
    df_y = df["class"]
    df_X = df.drop("class", axis=1)
    X_train, X_test, y_train, y_test = train_test_split(df_X, df_y)
    
#     clf = KNeighborsClassifier()
    clf = RandomForestClassifier()
#     clf = GaussianNB()
    
#     randomforest_cv_bo = BayesianOptimization(
#             randomforest_cv,
#             {'n_estimators': (10, 250),
#             'min_samples_split': (2, 25),
#             'max_features': (0.1, 0.999),
#             "max_depth": (1, 100),
#             }
#         )

#     gp_params = {"alpha": 1e-5}
#     randomforest_cv_bo.maximize(n_iter=50, **gp_params)
#     print(randomforest_cv_bo.max)


#     clf = RandomForestClassifier(
#                 n_estimators=int(randomforest_cv_bo.max['params']['n_estimators']),
#                 min_samples_split=int(randomforest_cv_bo.max['params']['min_samples_split']),
#                 max_features=randomforest_cv_bo.max['params']['max_features'],
#                 max_depth = randomforest_cv_bo.max['params']['max_depth'],
#                 random_state=0,
#                 #max_leaf_nodes = randomforest_cv_bo.max['params']['max_leaf_nodes']
#             )
    
    clf.fit(X_train, y_train)
    result = clf.predict(X_test)
#     precision = precision_score(y_test, result, average=None)
    accuracy = accuracy_score(y_test, result)
#     confusion = confusion_matrix(y_test, result)
    
    return accuracy

def clustering(df):
    df = df.dropna()
    model = KMeans(n_clusters=3).fit(df)
    print(model.labels_)
    
    pca = PCA(n_components=2)
    pca.fit(df)
    pca_data = pca.fit_transform(df)
    
    color = ["red", "blue", "green"]

    # クラスタリング結果のプロット
    plt.figure()
    for i in range(pca_data.shape[0]):
        plt.scatter(pca_data[i,0], pca_data[i,1], c=color[int(model.labels_[i])])

    # 生データのプロット
    plt.figure()
    for j in range(pca_data.shape[0]):
        color = tuple((round(df[j][0]/256, 3), round(df[j][1]/256, 3), round(df[j][2]/256, 3)))

        plt.scatter(pca_data[j,0], pca_data[j,1], c=color)

    plt.show()

    
def addRSI(df, n=5):
    close = np.array(df["Close"])
    df["RSI"] = ta.RSI(close, timeperiod=n) / close
    return df
def addBBAND(df, n=10):
    close = np.array(df["Close"])
    upper, middle, lower = ta.BBANDS(close, timeperiod=n, nbdevup=2, nbdevdn=2)
    df["BBANDS+σ"] = upper / close
    df["BBANDS-σ"] = lower / close
    return df

def addEMA_n(df, n=5):
    close = np.array(df["Close"])
    df["EMA"+str(n)]= ta.EMA(close, timeperiod=n)# / close
    return df

def addPreData(df, n=5):
    ori = df
    pre = ori
    for x in range(n):
        pre = pre.shift(1)
        pre = pre.drop("class", axis = 1)
        pre = pd.concat([ori, pre], axis=1)
    return pre


def dropData(df):
    cols = ["Open", "High", "Low", "Close", "Volume"]
    for x in range(len(cols)):
        df = df.drop(cols[x], axis = 1)

    return df
def n_test():
    R = 0
    df = csv_to_df("USD_JPY_D.csv")
    df = addclass(df, R)
    for i in range(4,15):
        for j in range(4,15):
            for k in range(4,15):
#                 df = csv_to_df("USD_JPY_D.csv")
#                 df = addclass(df, R)
                df = addRSI(df, i)
                df = addBBAND(df, j)
                df = addEMA_n(df, k)
                
                average = 0
                for x in range(10):
                    average += predict(df)
                print("(i, j, k) = " + str(i) +","+ str(j) +","+ str(k) , end = "  : ")
                print(average/10)

def bbandtest():
    R=0
    for i in range(2,10):
        for j in range(0,20):
            df = csv_to_df("USD_JPY_m15.csv")
            print("Datanum:"+str(j)+" BBAND: "+str(i), end=" ")
            df = addclass(df, R)
            df = addBBAND(df, i)
            df = dropData(df)
            df = addPreData(df, j)
            average = 0
            for x in range(10):
                average += predict(df)
            print(average/10)
            clustering(df)

R=0
df = csv_to_df("USD_JPY_m15.csv")
df = addclass(df, R)
df = addBBAND(df, 20)
df = dropData(df)
df = addPreData(df, 5)

average = 0
for x in range(10):
    average += predict(df)

print(average/10)



# R = 0.00005
# R=0
# df = csv_to_df("USD_JPY_H1_all.csv")
# df = addclass(df, R)
# df = addBBAND(df)
# # df = addRSI(df)
# # df = addEMA_n(df)
# df = dropData(df)
# # df = addPreData(df, 1)
# print(df.dropna())

# clustering(df)

# average = 0
# for x in range(100):
#     average += predict(df)

# print(average/100)

# n_test()
#bbandtest()

In [None]:
USD_JPY_H1_all
Datanum:0 BBAND: 2 0.5054523076923078
Datanum:1 BBAND: 2 0.5018154963382362
Datanum:2 BBAND: 2 0.505895747430611
Datanum:3 BBAND: 2 0.5079204874146102
Datanum:4 BBAND: 2 0.5068188811619178
Datanum:5 BBAND: 2 0.5072932053175776
Datanum:6 BBAND: 2 0.506536189069424
Datanum:7 BBAND: 2 0.5085056622353521
Datanum:8 BBAND: 2 0.508234859675037
Datanum:9 BBAND: 2 0.5078845325290822
Datanum:10 BBAND: 2 0.5080999569151229
Datanum:11 BBAND: 2 0.5109312488459408
Datanum:12 BBAND: 2 0.5076383332307502
Datanum:13 BBAND: 2 0.511547457835775
Datanum:14 BBAND: 2 0.5075772497845623
Datanum:15 BBAND: 2 0.5094361689031147
Datanum:16 BBAND: 2 0.5088021666871845
Datanum:17 BBAND: 2 0.5102123730378578
Datanum:18 BBAND: 2 0.5099846106494306
Datanum:19 BBAND: 2 0.5084272083718068
Datanum:0 BBAND: 3 0.503003261739184
Datanum:1 BBAND: 3 0.5063696227460153
Datanum:2 BBAND: 3 0.5097790633269739
Datanum:3 BBAND: 3 0.5093851929349498
Datanum:4 BBAND: 3 0.5091703594288528
Datanum:5 BBAND: 3 0.508856474643033
Datanum:6 BBAND: 3 0.5083579517479075
Datanum:7 BBAND: 3 0.5092688330871493
Datanum:8 BBAND: 3 0.508881639687327
Datanum:9 BBAND: 3 0.512328429863975
Datanum:10 BBAND: 3 0.5116021419338955
Datanum:11 BBAND: 3 0.5113128577583554
Datanum:12 BBAND: 3 0.5133571340637696
Datanum:13 BBAND: 3 0.5091714883663672
Datanum:14 BBAND: 3 0.5096639172719439
Datanum:15 BBAND: 3 0.5107534162255325
Datanum:16 BBAND: 3 0.5110926438904279
Datanum:17 BBAND: 3 0.5117143736534319
Datanum:18 BBAND: 3 0.5082056017236071
Datanum:19 BBAND: 3 0.5117328408741151
Datanum:0 BBAND: 4 0.5095513570065849
Datanum:1 BBAND: 4 0.5114837836174534
Datanum:2 BBAND: 4 0.5106960428334052
Datanum:3 BBAND: 4 0.5105243722304283
Datanum:4 BBAND: 4 0.507287050713934
Datanum:5 BBAND: 4 0.5072070408665681
Datanum:6 BBAND: 4 0.5096750369276218
Datanum:7 BBAND: 4 0.5091586138979505
Datanum:8 BBAND: 4 0.509786422108697
Datanum:9 BBAND: 4 0.5114544223548962
Datanum:10 BBAND: 4 0.5102726657229026
Datanum:11 BBAND: 4 0.5093130616767204
Datanum:12 BBAND: 4 0.5088514095777422
Datanum:13 BBAND: 4 0.5103225409331528
Datanum:14 BBAND: 4 0.508814477409824
Datanum:15 BBAND: 4 0.510649430594029
Datanum:16 BBAND: 4 0.5097506925207755
Datanum:17 BBAND: 4 0.5069067405355494
Datanum:18 BBAND: 4 0.5100892582333025
Datanum:19 BBAND: 4 0.5104407781334646
Datanum:0 BBAND: 5 0.5062096128992554
Datanum:1 BBAND: 5 0.5061911502246292
Datanum:2 BBAND: 5 0.5096750369276218
Datanum:3 BBAND: 5 0.5068439192516002
Datanum:4 BBAND: 5 0.5082163958641063
Datanum:5 BBAND: 5 0.5092442146725751
Datanum:6 BBAND: 5 0.5067212408444636
Datanum:7 BBAND: 5 0.5067643257216717
Datanum:8 BBAND: 5 0.5063088570197575
Datanum:9 BBAND: 5 0.5068874253708376
Datanum:10 BBAND: 5 0.5080327465222207
Datanum:11 BBAND: 5 0.5095223439615906
Datanum:12 BBAND: 5 0.5082297180844516
Datanum:13 BBAND: 5 0.5058537486150437
Datanum:14 BBAND: 5 0.5080147737765467
Datanum:15 BBAND: 5 0.5077377654662973
Datanum:16 BBAND: 5 0.5098799630655586
Datanum:17 BBAND: 5 0.5099846106494306
Datanum:18 BBAND: 5 0.5118320610687024
Datanum:19 BBAND: 5 0.508206106870229
Datanum:0 BBAND: 6 0.5068065727121669
Datanum:1 BBAND: 6 0.5061238306253077
Datanum:2 BBAND: 6 0.5077917282127032
Datanum:3 BBAND: 6 0.508259478089611
Datanum:4 BBAND: 6 0.5086841457410143
Datanum:5 BBAND: 6 0.5084384809503292
Datanum:6 BBAND: 6 0.5069859050901705
Datanum:7 BBAND: 6 0.5075460084938757
Datanum:8 BBAND: 6 0.5063580968794239
Datanum:9 BBAND: 6 0.5088821863843407
Datanum:10 BBAND: 6 0.5086359719315523
Datanum:11 BBAND: 6 0.5106733965283763
Datanum:12 BBAND: 6 0.5086298165702327
Datanum:13 BBAND: 6 0.5099045860264696
Datanum:14 BBAND: 6 0.5073068636503539
Datanum:15 BBAND: 6 0.5109018159433673
Datanum:16 BBAND: 6 0.5084210526315789
Datanum:17 BBAND: 6 0.507824427480916
Datanum:18 BBAND: 6 0.5130755971435608
Datanum:19 BBAND: 6 0.5079229253878355
Datanum:0 BBAND: 7 0.509687346134909
Datanum:1 BBAND: 7 0.5083456425406203
Datanum:2 BBAND: 7 0.5054222058099459
Datanum:3 BBAND: 7 0.5049113737075333
Datanum:4 BBAND: 7 0.5068381855111713
Datanum:5 BBAND: 7 0.5089739644242014
Datanum:6 BBAND: 7 0.5084446359327877
Datanum:7 BBAND: 7 0.5079891672308734
Datanum:8 BBAND: 7 0.507097131601625
Datanum:9 BBAND: 7 0.5076757355656777
Datanum:10 BBAND: 7 
​
