In [3]:
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score

#def show_data_relation(dataname):
#     df = pd.read_csv(dataname, index_col='Datetime')
#     df = df.astype('double')
    
#     count = [0, 0, 0]
#     df['class'] = (df["Rate"]).apply(lambda x: classify(x, count, 0.001))
#     df['class'] = df['class'].shift(-1)
#     print(' LOW/MID/HIGH')
#     print(count)
#     sns.pairplot(df, hue='class', palette='RdYlBu', height=8, kind='reg').savefig('./data_plot'+dataname+'.png')
    
#     iris = sns.load_dataset("iris")
#     sns.pairplot(iris, hue='species', palette='RdYlBu', height=8, kind='reg', diag_kind='kde')

    
def classify(x, c, R=0.01):
        if x<=-R:
            c[0]+=1
            return 0
        elif -R<x<R:
            c[1]+=1
            return 1
        elif R<=x:
            c[2]+=1
            return 2

def predict(dataname):

    df = pd.read_csv(dataname, index_col='Datetime')
    count = [0,0,0]
    df['class'] = (df["Rate"]).apply(lambda x: classify(x, count, 0.001))
    df['class'] = df['class'].shift(-1)
    print(" LOW/MID/HIGH")
    print(count)
    
    
    X = df.drop('class', axis=1)
    y = df['class']
    X = X.drop(X.index[len(X)-1])
    y = y.dropna()
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
    
    names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
             "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis",
             "Quadratic Discriminant Analysis"]
    classifiers = [
        KNeighborsClassifier(3),
        SVC(kernel="linear", C=0.025),
        SVC(gamma=2, C=1),
        DecisionTreeClassifier(max_depth=5),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        AdaBoostClassifier(),
        GaussianNB(),
        LinearDiscriminantAnalysis(),
        QuadraticDiscriminantAnalysis()]
    
    for name, clf in zip(names, classifiers):
        print(name)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        result = clf.predict(X_test)
        
        print(score)
        print('accuracy_score: ', end='')
        print(accuracy_score(y_test, result))
        print('confusion_matrix: ')
        print(confusion_matrix(y_test, result))
        print('precision_score: ', end='')
        print(precision_score(y_test, result, average=None))
        print('')

def main():
    namelist = ['20101to20208_USD_JPY_D.csv','20101to20208_EUR_JPY_D.csv','20101to20208_AUD_JPY_D.csv','20101to20208_GBP_JPY_D.csv','20101to20208_NZD_JPY_D.csv','20101to20208_CAD_JPY_D.csv','20101to20208_CHF_JPY_D.csv','20101to20208_ZAR_JPY_D.csv']
    
    for i in range(len(namelist)):
        print(namelist[i])
        predict(namelist[i]) 
        #show_data_relation(namelist[i])
    
 
main()

20101to20208_USD_JPY_D.csv
 LOW/MID/HIGH
[815, 573, 801]
Nearest Neighbors
0.4132420091324201
accuracy_score: 0.4132420091324201
confusion_matrix: 
[[101  26  42]
 [ 62  25  31]
 [ 71  25  55]]
precision_score: [0.43162393 0.32894737 0.4296875 ]

Linear SVM
0.3858447488584475
accuracy_score: 0.3858447488584475
confusion_matrix: 
[[141  17  11]
 [ 89  16  13]
 [130   9  12]]
precision_score: [0.39166667 0.38095238 0.33333333]

RBF SVM
0.3812785388127854
accuracy_score: 0.3812785388127854
confusion_matrix: 
[[158   2   9]
 [104   6   8]
 [142   6   3]]
precision_score: [0.39108911 0.42857143 0.15      ]

Decision Tree
0.3744292237442922
accuracy_score: 0.3744292237442922
confusion_matrix: 
[[79  7 83]
 [47 11 60]
 [71  6 74]]
precision_score: [0.40101523 0.45833333 0.34101382]

Random Forest
0.3835616438356164
accuracy_score: 0.3835616438356164
confusion_matrix: 
[[82 19 68]
 [42 25 51]
 [72 18 61]]
precision_score: [0.41836735 0.40322581 0.33888889]

AdaBoost
0.4155251141552511
accuracy

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0.3623853211009174
accuracy_score: 0.3623853211009174
confusion_matrix: 
[[149  15   3]
 [ 71   6   1]
 [177  11   3]]
precision_score: [0.37531486 0.1875     0.42857143]

RBF SVM
0.4334862385321101
accuracy_score: 0.4334862385321101
confusion_matrix: 
[[  2   3 162]
 [  3   4  71]
 [  2   6 183]]
precision_score: [0.28571429 0.30769231 0.43990385]

Decision Tree
0.4105504587155963
accuracy_score: 0.4105504587155963
confusion_matrix: 
[[ 18   3 146]
 [  7   2  69]
 [ 28   4 159]]
precision_score: [0.33962264 0.22222222 0.42513369]

Random Forest
0.3922018348623853
accuracy_score: 0.3922018348623853
confusion_matrix: 
[[ 67   0 100]
 [ 37   0  41]
 [ 87   0 104]]
precision_score: [0.35078534 0.         0.4244898 ]

AdaBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0.41284403669724773
accuracy_score: 0.41284403669724773
confusion_matrix: 
[[83  2 82]
 [29  0 49]
 [94  0 97]]
precision_score: [0.40291262 0.         0.4254386 ]

Naive Bayes
0.4197247706422018
accuracy_score: 0.4197247706422018
confusion_matrix: 
[[ 96   0  71]
 [ 50   0  28]
 [104   0  87]]
precision_score: [0.384      0.         0.46774194]

Linear Discriminant Analysis
0.4013761467889908
accuracy_score: 0.4013761467889908
confusion_matrix: 
[[ 64   0 103]
 [ 30   0  48]
 [ 80   0 111]]
precision_score: [0.36781609 0.         0.42366412]

Quadratic Discriminant Analysis
0.33256880733944955
accuracy_score: 0.33256880733944955
confusion_matrix: 
[[78 62 27]
 [32 33 13]
 [83 74 34]]
precision_score: [0.40414508 0.19526627 0.45945946]

20101to20208_GBP_JPY_D.csv
 LOW/MID/HIGH
[841, 441, 899]
Nearest Neighbors
0.4059633027522936
accuracy_score: 0.4059633027522936
confusion_matrix: 
[[80 27 49]
 [52 22 32]
 [86 13 75]]
precision_score: [0.36697248 0.35483871 0.48076923]

Linear SVM


  _warn_prf(average, modifier, msg_start, len(result))


0.42660550458715596
accuracy_score: 0.42660550458715596
confusion_matrix: 
[[  5   9 142]
 [  2  19  85]
 [  1  11 162]]
precision_score: [0.625      0.48717949 0.41645244]

RBF SVM
0.39908256880733944
accuracy_score: 0.39908256880733944
confusion_matrix: 
[[  3   0 153]
 [  7   3  96]
 [  4   2 168]]
precision_score: [0.21428571 0.6        0.4028777 ]

Decision Tree
0.41743119266055045
accuracy_score: 0.41743119266055045
confusion_matrix: 
[[ 53   3 100]
 [ 29   9  68]
 [ 49   5 120]]
precision_score: [0.40458015 0.52941176 0.41666667]

Random Forest
0.37844036697247707
accuracy_score: 0.37844036697247707
confusion_matrix: 
[[ 47   4 105]
 [ 37  13  56]
 [ 65   4 105]]
precision_score: [0.31543624 0.61904762 0.39473684]

AdaBoost
0.3876146788990826
accuracy_score: 0.3876146788990826
confusion_matrix: 
[[74  4 78]
 [49  9 48]
 [83  5 86]]
precision_score: [0.3592233  0.5        0.40566038]

Naive Bayes
0.3394495412844037
accuracy_score: 0.3394495412844037
confusion_matrix: 
[[63 10 83]

  _warn_prf(average, modifier, msg_start, len(result))


0.3904109589041096
accuracy_score: 0.3904109589041096
confusion_matrix: 
[[159  11   3]
 [ 61   7   0]
 [180  12   5]]
precision_score: [0.3975     0.23333333 0.625     ]

RBF SVM
0.3972602739726027
accuracy_score: 0.3972602739726027
confusion_matrix: 
[[164   4   5]
 [ 66   1   1]
 [187   1   9]]
precision_score: [0.39328537 0.16666667 0.6       ]

Decision Tree
0.4246575342465753
accuracy_score: 0.4246575342465753
confusion_matrix: 
[[ 81   1  91]
 [ 25   0  43]
 [ 91   1 105]]
precision_score: [0.41116751 0.         0.43933054]

Random Forest
0.4520547945205479
accuracy_score: 0.4520547945205479
confusion_matrix: 
[[117   0  56]
 [ 50   1  17]
 [117   0  80]]
precision_score: [0.41197183 1.         0.52287582]

AdaBoost
0.4132420091324201
accuracy_score: 0.4132420091324201
confusion_matrix: 
[[86  9 78]
 [35  1 32]
 [98  5 94]]
precision_score: [0.39269406 0.06666667 0.46078431]

Naive Bayes
0.3447488584474886
accuracy_score: 0.3447488584474886
confusion_matrix: 
[[ 98  38  37]
 [ 3

  _warn_prf(average, modifier, msg_start, len(result))


0.35091743119266056
accuracy_score: 0.35091743119266056
confusion_matrix: 
[[143  12   9]
 [ 80   8   3]
 [164  15   2]]
precision_score: [0.36950904 0.22857143 0.14285714]

RBF SVM
0.3555045871559633
accuracy_score: 0.3555045871559633
confusion_matrix: 
[[148   5  11]
 [ 82   1   8]
 [172   3   6]]
precision_score: [0.3681592  0.11111111 0.24      ]

Decision Tree
0.40825688073394495
accuracy_score: 0.40825688073394495
confusion_matrix: 
[[114   8  42]
 [ 73   4  14]
 [111  10  60]]
precision_score: [0.38255034 0.18181818 0.51724138]

Random Forest
0.41284403669724773
accuracy_score: 0.41284403669724773
confusion_matrix: 
[[101   0  63]
 [ 55   1  35]
 [102   1  78]]
precision_score: [0.39147287 0.5        0.44318182]

AdaBoost
0.40825688073394495
accuracy_score: 0.40825688073394495
confusion_matrix: 
[[91  8 65]
 [50  6 35]
 [89 11 81]]
precision_score: [0.39565217 0.24       0.44751381]

Naive Bayes
0.4013761467889908
accuracy_score: 0.4013761467889908
confusion_matrix: 
[[ 63   0 1

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0.3853211009174312
accuracy_score: 0.3853211009174312
confusion_matrix: 
[[  8   6 153]
 [  8   8  78]
 [  8  15 152]]
precision_score: [0.33333333 0.27586207 0.39686684]

RBF SVM
0.3967889908256881
accuracy_score: 0.3967889908256881
confusion_matrix: 
[[  2   3 162]
 [  1   1  92]
 [  5   0 170]]
precision_score: [0.25      0.25      0.4009434]

Decision Tree
0.3669724770642202
accuracy_score: 0.3669724770642202
confusion_matrix: 
[[  6   9 152]
 [  3   6  85]
 [ 14  13 148]]
precision_score: [0.26086957 0.21428571 0.38441558]

Random Forest
0.38990825688073394
accuracy_score: 0.38990825688073394
confusion_matrix: 
[[ 56   4 107]
 [ 36   3  55]
 [ 59   5 111]]
precision_score: [0.37086093 0.25       0.40659341]

AdaBoost
0.3669724770642202
accuracy_score: 0.3669724770642202
confusion_matrix: 
[[69  4 94]
 [47  6 41]
 [76 14 85]]
precision_score: [0.359375   0.25       0.38636364]

Naive Bayes
0.3853211009174312
accuracy_score: 0.3853211009174312
confusion_matrix: 
[[79 46 42]
 [37 34 

  _warn_prf(average, modifier, msg_start, len(result))


0.4068965517241379
accuracy_score: 0.4068965517241379
confusion_matrix: 
[[ 76   0 118]
 [ 19   0  41]
 [ 80   0 101]]
precision_score: [0.43428571 0.         0.38846154]

RBF SVM


  _warn_prf(average, modifier, msg_start, len(result))


0.42758620689655175
accuracy_score: 0.42758620689655175
confusion_matrix: 
[[ 17   5 172]
 [  4   2  54]
 [  8   6 167]]
precision_score: [0.5862069  0.15384615 0.42493639]

Decision Tree
0.4160919540229885
accuracy_score: 0.4160919540229885
confusion_matrix: 
[[ 32   1 161]
 [ 17   2  41]
 [ 29   5 147]]
precision_score: [0.41025641 0.25       0.42120344]

Random Forest
0.4045977011494253
accuracy_score: 0.4045977011494253
confusion_matrix: 
[[ 75   2 117]
 [ 34   1  25]
 [ 78   3 100]]
precision_score: [0.40106952 0.16666667 0.41322314]

AdaBoost
0.41379310344827586
accuracy_score: 0.41379310344827586
confusion_matrix: 
[[95  4 95]
 [27  3 30]
 [95  4 82]]
precision_score: [0.43778802 0.27272727 0.39613527]

Naive Bayes
0.35172413793103446
accuracy_score: 0.35172413793103446
confusion_matrix: 
[[ 33  37 124]
 [ 18  19  23]
 [ 40  40 101]]
precision_score: [0.36263736 0.19791667 0.40725806]

Linear Discriminant Analysis
0.3931034482758621
accuracy_score: 0.3931034482758621
confusion_m

  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
20101to20208_USD_JPY_D.csv
 LOW/MID/HIGH
[815, 573, 801]
Nearest Neighbors
0.4132420091324201
accuracy_score: 0.4132420091324201
confusion_matrix: 
[[101  26  42]
 [ 62  25  31]
 [ 71  25  55]]
precision_score: [0.43162393 0.32894737 0.4296875 ]

Linear SVM
0.3858447488584475
accuracy_score: 0.3858447488584475
confusion_matrix: 
[[141  17  11]
 [ 89  16  13]
 [130   9  12]]
precision_score: [0.39166667 0.38095238 0.33333333]

RBF SVM
0.3812785388127854
accuracy_score: 0.3812785388127854
confusion_matrix: 
[[158   2   9]
 [104   6   8]
 [142   6   3]]
precision_score: [0.39108911 0.42857143 0.15      ]

Decision Tree
0.3744292237442922
accuracy_score: 0.3744292237442922
confusion_matrix: 
[[79  7 83]
 [47 11 60]
 [71  6 74]]
precision_score: [0.40101523 0.45833333 0.34101382]

Random Forest
0.3835616438356164
accuracy_score: 0.3835616438356164
confusion_matrix: 
[[82 19 68]
 [42 25 51]
 [72 18 61]]
precision_score: [0.41836735 0.40322581 0.33888889]

AdaBoost
0.4155251141552511
accuracy_score: 0.4155251141552511
confusion_matrix: 
[[87 18 64]
 [48 26 44]
 [67 15 69]]
precision_score: [0.43069307 0.44067797 0.38983051]

Naive Bayes
0.3538812785388128
accuracy_score: 0.3538812785388128
confusion_matrix: 
[[88 57 24]
 [56 49 13]
 [93 40 18]]
precision_score: [0.37130802 0.33561644 0.32727273]

Linear Discriminant Analysis
0.3561643835616438
accuracy_score: 0.3561643835616438
confusion_matrix: 
[[71 26 72]
 [50 32 36]
 [79 19 53]]
precision_score: [0.355      0.41558442 0.32919255]

Quadratic Discriminant Analysis
0.3538812785388128
accuracy_score: 0.3538812785388128
confusion_matrix: 
[[94 48 27]
 [53 49 16]
 [95 44 12]]
precision_score: [0.38842975 0.34751773 0.21818182]

20101to20208_EUR_JPY_D.csv
 LOW/MID/HIGH
[866, 456, 859]
Nearest Neighbors
0.38073394495412843
accuracy_score: 0.38073394495412843
confusion_matrix: 
[[88 24 68]
 [50 14 28]
 [82 18 64]]
precision_score: [0.4  0.25 0.4 ]

Linear SVM
0.42201834862385323
accuracy_score: 0.42201834862385323
confusion_matrix: 
[[118  17  45]
 [ 55  11  26]
 [ 98  11  55]]
precision_score: [0.43542435 0.28205128 0.43650794]

RBF SVM
0.3876146788990826
accuracy_score: 0.3876146788990826
confusion_matrix: 
[[  9   3 168]
 [  4   1  87]
 [  2   3 159]]
precision_score: [0.6        0.14285714 0.38405797]

Decision Tree
0.3463302752293578
accuracy_score: 0.3463302752293578
confusion_matrix: 
[[115   3  62]
 [ 70   0  22]
 [121   7  36]]
precision_score: [0.37581699 0.         0.3       ]

Random Forest
0.36009174311926606
accuracy_score: 0.36009174311926606
confusion_matrix: 
[[80  2 98]
 [56  3 33]
 [86  4 74]]
precision_score: [0.36036036 0.33333333 0.36097561]

AdaBoost
0.3967889908256881
accuracy_score: 0.3967889908256881
confusion_matrix: 
[[99  8 73]
 [54  3 35]
 [91  2 71]]
precision_score: [0.4057377  0.23076923 0.39664804]

Naive Bayes
0.41743119266055045
accuracy_score: 0.41743119266055045
confusion_matrix: 
[[124   0  56]
 [ 59   0  33]
 [106   0  58]]
precision_score: [0.42906574 0.         0.39455782]

Linear Discriminant Analysis
0.4197247706422018
accuracy_score: 0.4197247706422018
confusion_matrix: 
[[106   0  74]
 [ 56   0  36]
 [ 87   0  77]]
precision_score: [0.42570281 0.         0.41176471]

Quadratic Discriminant Analysis
0.39908256880733944
accuracy_score: 0.39908256880733944
confusion_matrix: 
[[146   5  29]
 [ 84   1   7]
 [134   3  27]]
precision_score: [0.4010989  0.11111111 0.42857143]

20101to20208_AUD_JPY_D.csv
 LOW/MID/HIGH
[867, 406, 906]
Nearest Neighbors
0.3830275229357798
accuracy_score: 0.3830275229357798
confusion_matrix: 
[[ 82  22  63]
 [ 40  12  26]
 [105  13  73]]
precision_score: [0.36123348 0.25531915 0.45061728]

Linear SVM
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
0.3623853211009174
accuracy_score: 0.3623853211009174
confusion_matrix: 
[[149  15   3]
 [ 71   6   1]
 [177  11   3]]
precision_score: [0.37531486 0.1875     0.42857143]

RBF SVM
0.4334862385321101
accuracy_score: 0.4334862385321101
confusion_matrix: 
[[  2   3 162]
 [  3   4  71]
 [  2   6 183]]
precision_score: [0.28571429 0.30769231 0.43990385]

Decision Tree
0.4105504587155963
accuracy_score: 0.4105504587155963
confusion_matrix: 
[[ 18   3 146]
 [  7   2  69]
 [ 28   4 159]]
precision_score: [0.33962264 0.22222222 0.42513369]

Random Forest
0.3922018348623853
accuracy_score: 0.3922018348623853
confusion_matrix: 
[[ 67   0 100]
 [ 37   0  41]
 [ 87   0 104]]
precision_score: [0.35078534 0.         0.4244898 ]

AdaBoost
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
0.41284403669724773
accuracy_score: 0.41284403669724773
confusion_matrix: 
[[83  2 82]
 [29  0 49]
 [94  0 97]]
precision_score: [0.40291262 0.         0.4254386 ]

Naive Bayes
0.4197247706422018
accuracy_score: 0.4197247706422018
confusion_matrix: 
[[ 96   0  71]
 [ 50   0  28]
 [104   0  87]]
precision_score: [0.384      0.         0.46774194]

Linear Discriminant Analysis
0.4013761467889908
accuracy_score: 0.4013761467889908
confusion_matrix: 
[[ 64   0 103]
 [ 30   0  48]
 [ 80   0 111]]
precision_score: [0.36781609 0.         0.42366412]

Quadratic Discriminant Analysis
0.33256880733944955
accuracy_score: 0.33256880733944955
confusion_matrix: 
[[78 62 27]
 [32 33 13]
 [83 74 34]]
precision_score: [0.40414508 0.19526627 0.45945946]

20101to20208_GBP_JPY_D.csv
 LOW/MID/HIGH
[841, 441, 899]
Nearest Neighbors
0.4059633027522936
accuracy_score: 0.4059633027522936
confusion_matrix: 
[[80 27 49]
 [52 22 32]
 [86 13 75]]
precision_score: [0.36697248 0.35483871 0.48076923]

Linear SVM
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
0.42660550458715596
accuracy_score: 0.42660550458715596
confusion_matrix: 
[[  5   9 142]
 [  2  19  85]
 [  1  11 162]]
precision_score: [0.625      0.48717949 0.41645244]

RBF SVM
0.39908256880733944
accuracy_score: 0.39908256880733944
confusion_matrix: 
[[  3   0 153]
 [  7   3  96]
 [  4   2 168]]
precision_score: [0.21428571 0.6        0.4028777 ]

Decision Tree
0.41743119266055045
accuracy_score: 0.41743119266055045
confusion_matrix: 
[[ 53   3 100]
 [ 29   9  68]
 [ 49   5 120]]
precision_score: [0.40458015 0.52941176 0.41666667]

Random Forest
0.37844036697247707
accuracy_score: 0.37844036697247707
confusion_matrix: 
[[ 47   4 105]
 [ 37  13  56]
 [ 65   4 105]]
precision_score: [0.31543624 0.61904762 0.39473684]

AdaBoost
0.3876146788990826
accuracy_score: 0.3876146788990826
confusion_matrix: 
[[74  4 78]
 [49  9 48]
 [83  5 86]]
precision_score: [0.3592233  0.5        0.40566038]

Naive Bayes
0.3394495412844037
accuracy_score: 0.3394495412844037
confusion_matrix: 
[[63 10 83]
 [44  6 56]
 [89  6 79]]
precision_score: [0.32142857 0.27272727 0.36238532]

Linear Discriminant Analysis
0.4036697247706422
accuracy_score: 0.4036697247706422
confusion_matrix: 
[[ 54   0 102]
 [ 34   0  72]
 [ 52   0 122]]
precision_score: [0.38571429 0.         0.41216216]

Quadratic Discriminant Analysis
0.3830275229357798
accuracy_score: 0.3830275229357798
confusion_matrix: 
[[18 61 77]
 [ 9 57 40]
 [17 65 92]]
precision_score: [0.40909091 0.31147541 0.44019139]

20101to20208_NZD_JPY_D.csv
 LOW/MID/HIGH
[894, 395, 900]
Nearest Neighbors
0.4246575342465753
accuracy_score: 0.4246575342465753
confusion_matrix: 
[[107  15  51]
 [ 32  10  26]
 [107  21  69]]
precision_score: [0.43495935 0.2173913  0.47260274]

Linear SVM
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
0.3904109589041096
accuracy_score: 0.3904109589041096
confusion_matrix: 
[[159  11   3]
 [ 61   7   0]
 [180  12   5]]
precision_score: [0.3975     0.23333333 0.625     ]

RBF SVM
0.3972602739726027
accuracy_score: 0.3972602739726027
confusion_matrix: 
[[164   4   5]
 [ 66   1   1]
 [187   1   9]]
precision_score: [0.39328537 0.16666667 0.6       ]

Decision Tree
0.4246575342465753
accuracy_score: 0.4246575342465753
confusion_matrix: 
[[ 81   1  91]
 [ 25   0  43]
 [ 91   1 105]]
precision_score: [0.41116751 0.         0.43933054]

Random Forest
0.4520547945205479
accuracy_score: 0.4520547945205479
confusion_matrix: 
[[117   0  56]
 [ 50   1  17]
 [117   0  80]]
precision_score: [0.41197183 1.         0.52287582]

AdaBoost
0.4132420091324201
accuracy_score: 0.4132420091324201
confusion_matrix: 
[[86  9 78]
 [35  1 32]
 [98  5 94]]
precision_score: [0.39269406 0.06666667 0.46078431]

Naive Bayes
0.3447488584474886
accuracy_score: 0.3447488584474886
confusion_matrix: 
[[ 98  38  37]
 [ 35  12  21]
 [107  49  41]]
precision_score: [0.40833333 0.12121212 0.41414141]

Linear Discriminant Analysis
0.4292237442922374
accuracy_score: 0.4292237442922374
confusion_matrix: 
[[ 99   0  74]
 [ 35   0  33]
 [108   0  89]]
precision_score: [0.40909091 0.         0.45408163]

Quadratic Discriminant Analysis
0.3470319634703196
accuracy_score: 0.3470319634703196
confusion_matrix: 
[[118  31  24]
 [ 43  18   7]
 [135  46  16]]
precision_score: [0.39864865 0.18947368 0.34042553]

20101to20208_CAD_JPY_D.csv
 LOW/MID/HIGH
[882, 405, 893]
Nearest Neighbors
0.39908256880733944
accuracy_score: 0.39908256880733944
confusion_matrix: 
[[96 12 56]
 [52  7 32]
 [93 17 71]]
precision_score: [0.39834025 0.19444444 0.44654088]

Linear SVM
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
0.35091743119266056
accuracy_score: 0.35091743119266056
confusion_matrix: 
[[143  12   9]
 [ 80   8   3]
 [164  15   2]]
precision_score: [0.36950904 0.22857143 0.14285714]

RBF SVM
0.3555045871559633
accuracy_score: 0.3555045871559633
confusion_matrix: 
[[148   5  11]
 [ 82   1   8]
 [172   3   6]]
precision_score: [0.3681592  0.11111111 0.24      ]

Decision Tree
0.40825688073394495
accuracy_score: 0.40825688073394495
confusion_matrix: 
[[114   8  42]
 [ 73   4  14]
 [111  10  60]]
precision_score: [0.38255034 0.18181818 0.51724138]

Random Forest
0.41284403669724773
accuracy_score: 0.41284403669724773
confusion_matrix: 
[[101   0  63]
 [ 55   1  35]
 [102   1  78]]
precision_score: [0.39147287 0.5        0.44318182]

AdaBoost
0.40825688073394495
accuracy_score: 0.40825688073394495
confusion_matrix: 
[[91  8 65]
 [50  6 35]
 [89 11 81]]
precision_score: [0.39565217 0.24       0.44751381]

Naive Bayes
0.4013761467889908
accuracy_score: 0.4013761467889908
confusion_matrix: 
[[ 63   0 101]
 [ 40   0  51]
 [ 69   0 112]]
precision_score: [0.36627907 0.         0.42424242]

Linear Discriminant Analysis
0.3944954128440367
accuracy_score: 0.3944954128440367
confusion_matrix: 
[[88  0 76]
 [52  0 39]
 [97  0 84]]
precision_score: [0.37130802 0.         0.42211055]

Quadratic Discriminant Analysis
0.3876146788990826
accuracy_score: 0.3876146788990826
confusion_matrix: 
[[126   4  34]
 [ 70   3  18]
 [139   2  40]]
precision_score: [0.3761194  0.33333333 0.43478261]

20101to20208_CHF_JPY_D.csv
 LOW/MID/HIGH
[838, 477, 866]
Nearest Neighbors
0.3440366972477064
accuracy_score: 0.3440366972477064
confusion_matrix: 
[[ 87  17  63]
 [ 53   8  33]
 [101  19  55]]
precision_score: [0.36099585 0.18181818 0.36423841]

Linear SVM
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
0.3853211009174312
accuracy_score: 0.3853211009174312
confusion_matrix: 
[[  8   6 153]
 [  8   8  78]
 [  8  15 152]]
precision_score: [0.33333333 0.27586207 0.39686684]

RBF SVM
0.3967889908256881
accuracy_score: 0.3967889908256881
confusion_matrix: 
[[  2   3 162]
 [  1   1  92]
 [  5   0 170]]
precision_score: [0.25      0.25      0.4009434]

Decision Tree
0.3669724770642202
accuracy_score: 0.3669724770642202
confusion_matrix: 
[[  6   9 152]
 [  3   6  85]
 [ 14  13 148]]
precision_score: [0.26086957 0.21428571 0.38441558]

Random Forest
0.38990825688073394
accuracy_score: 0.38990825688073394
confusion_matrix: 
[[ 56   4 107]
 [ 36   3  55]
 [ 59   5 111]]
precision_score: [0.37086093 0.25       0.40659341]

AdaBoost
0.3669724770642202
accuracy_score: 0.3669724770642202
confusion_matrix: 
[[69  4 94]
 [47  6 41]
 [76 14 85]]
precision_score: [0.359375   0.25       0.38636364]

Naive Bayes
0.3853211009174312
accuracy_score: 0.3853211009174312
confusion_matrix: 
[[79 46 42]
 [37 34 23]
 [74 46 55]]
precision_score: [0.41578947 0.26984127 0.45833333]

Linear Discriminant Analysis
0.37155963302752293
accuracy_score: 0.37155963302752293
confusion_matrix: 
[[ 60   0 107]
 [ 30   0  64]
 [ 73   0 102]]
precision_score: [0.36809816 0.         0.37362637]

Quadratic Discriminant Analysis
0.3165137614678899
accuracy_score: 0.3165137614678899
confusion_matrix: 
[[11 96 60]
 [ 2 67 25]
 [20 95 60]]
precision_score: [0.33333333 0.25968992 0.4137931 ]

20101to20208_ZAR_JPY_D.csv
 LOW/MID/HIGH
[934, 311, 930]
Nearest Neighbors
0.38620689655172413
accuracy_score: 0.38620689655172413
confusion_matrix: 
[[92 11 91]
 [30  6 24]
 [95 16 70]]
precision_score: [0.42396313 0.18181818 0.37837838]

Linear SVM
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
0.4068965517241379
accuracy_score: 0.4068965517241379
confusion_matrix: 
[[ 76   0 118]
 [ 19   0  41]
 [ 80   0 101]]
precision_score: [0.43428571 0.         0.38846154]

RBF SVM
C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
0.42758620689655175
accuracy_score: 0.42758620689655175
confusion_matrix: 
[[ 17   5 172]
 [  4   2  54]
 [  8   6 167]]
precision_score: [0.5862069  0.15384615 0.42493639]

Decision Tree
0.4160919540229885
accuracy_score: 0.4160919540229885
confusion_matrix: 
[[ 32   1 161]
 [ 17   2  41]
 [ 29   5 147]]
precision_score: [0.41025641 0.25       0.42120344]

Random Forest
0.4045977011494253
accuracy_score: 0.4045977011494253
confusion_matrix: 
[[ 75   2 117]
 [ 34   1  25]
 [ 78   3 100]]
precision_score: [0.40106952 0.16666667 0.41322314]

AdaBoost
0.41379310344827586
accuracy_score: 0.41379310344827586
confusion_matrix: 
[[95  4 95]
 [27  3 30]
 [95  4 82]]
precision_score: [0.43778802 0.27272727 0.39613527]

Naive Bayes
0.35172413793103446
accuracy_score: 0.35172413793103446
confusion_matrix: 
[[ 33  37 124]
 [ 18  19  23]
 [ 40  40 101]]
precision_score: [0.36263736 0.19791667 0.40725806]

Linear Discriminant Analysis
0.3931034482758621
accuracy_score: 0.3931034482758621
confusion_matrix: 
[[ 83   0 111]
 [ 32   0  28]
 [ 93   0  88]]
precision_score: [0.39903846 0.         0.3876652 ]

Quadratic Discriminant Analysis
0.3816091954022989
accuracy_score: 0.3816091954022989
confusion_matrix: 
[[ 54  23 117]
 [ 16  14  30]
 [ 56  27  98]]
precision_score: [0.42857143 0.21875    0.4       ]

C:\Users\stana\.conda\envs\predict_rate\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))

SyntaxError: invalid syntax (<ipython-input-4-30ef71d49726>, line 1)

In [1]:
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score

def show_data_relation(dataname):
    df = pd.read_csv(dataname, index_col='Datetime')
    df = df.astype('double')
    
    count = [0, 0, 0]
    df['class'] = (df["Rate"]).apply(lambda x: classify(x, count, 0.001))
    df['class'] = df['class'].shift(-1)
    print(' LOW/MID/HIGH')
    print(count)
    sns.pairplot(df, hue='class', palette='RdYlBu', height=8, kind='reg').savefig('./data_plot'+dataname+'.png')
    
#     iris = sns.load_dataset("iris")
#     sns.pairplot(iris, hue='species', palette='RdYlBu', height=8, kind='reg', diag_kind='kde')

    
def classify(x, c, R=0.01):
        if x<=-R:
            c[0]+=1
            return 0
        elif -R<x<R:
            c[1]+=1
            return 1
        elif R<=x:
            c[2]+=1
            return 2
        
def classify_two(x, c):
        if x<=0:
            c[0]+=1
            return 0
        elif 0<x:
            c[2]+=1
            return 2

def predict(dataname):

    df = pd.read_csv(dataname, index_col='Datetime')
    count = [0,0,0]
    #df['class'] = (df["Rate"]).apply(lambda x: classify(x, count, 0.001))
    df['class'] = (df["Rate"]).apply(lambda x: classify_two(x, count))
    df['class'] = df['class'].shift(-1)
    print(" LOW/MID/HIGH")
    print(count)
    
    
    X = df.drop('class', axis=1)
    y = df['class']
    X = X.drop(X.index[len(X)-1])
    y = y.dropna()
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle=False)
    
    names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
             "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis",
             "Quadratic Discriminant Analysis"]
    classifiers = [
        KNeighborsClassifier(3),
        SVC(kernel="linear", C=0.025),
        SVC(gamma=2, C=1),
        DecisionTreeClassifier(max_depth=5),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        AdaBoostClassifier(),
        GaussianNB(),
        LinearDiscriminantAnalysis(),
        QuadraticDiscriminantAnalysis()]
    
    accuracy_scores = []
    precision_scores = []
    
    
    for name, clf in zip(names, classifiers):
        print(name)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        result = clf.predict(X_test)
        
        print('accuracy_score: ', end='')
        print(accuracy_score(y_test, result))
        accuracy_scores.extend([accuracy_score(y_test, result)])
        print('confusion_matrix: ')
        print(confusion_matrix(y_test, result))
        print('precision_score: ', end='')
        print(precision_score(y_test, result, average=None))
        precision_scores.extend([precision_score(y_test, result, average=None)])
        
        possestion = 50000
        pay = 5000
        po_rate = 1.95
        
        for i in range(len(result)):
            possestion -= pay
            if (result[i]==0):#Low予想のとき
                if (result[i]==y_test[i]): #予測成功
                    possestion += pay*po_rate

            elif(result[i]==2):#High予想の時
                if (result[i]==y_test[i]): #予測成功
                    possestion += pay*po_rate
            else:  #Mid予想の時
                possestion += pay
        print(str(len(result))+"days trade")
        print("posestion: "+ str(possestion))
        
        print('')
    
    print(accuracy_scores)
    print(precision_scores)

def main():
    namelist = ['20101to20208_USD_JPY_D.csv','20101to20208_EUR_JPY_D.csv','20101to20208_AUD_JPY_D.csv','20101to20208_GBP_JPY_D.csv','20101to20208_NZD_JPY_D.csv','20101to20208_CAD_JPY_D.csv','20101to20208_CHF_JPY_D.csv','20101to20208_ZAR_JPY_D.csv']
    
    for i in range(len(namelist)):
        print(namelist[i])
        predict(namelist[i]) 
        #show_data_relation(namelist[i])
    

main()

20101to20208_USD_JPY_D.csv


FileNotFoundError: [Errno 2] No such file or directory: '20101to20208_USD_JPY_D.csv'