In [1]:
import pandas as pd
import temp.file as temp_files
%config IPCompleter.greedy=True

In [2]:
features_names = ['date', 'time', 'open', 'high', 'low', 'close', 'volume']
data = pd.read_csv(temp_files.get('EURUSD30.csv'), skiprows=1,
                      header=None, names=features_names, index_col=[0], parse_dates=[[0,1]])

In [3]:
def getDayBarSets(day_df, count_decimal=5):
    """
    return entry points (-1)
    """
    
    # check
    if len(day_df) == 0:
        return []
    
    # prices settings
    pips_multiplier = 10 ** count_decimal
    need_bars = 5
    pips_to_line_max = 25
    pips_min_delta = 250
    day_open_price = day_df.iloc[0].open
    
    # frame settings
    from_bar_id = 0
    step = 1
    frame_length = 12
    
    return_data = [ ]

    while True:
        
        # frame - 6h
        frame = day_df[from_bar_id:from_bar_id+frame_length]
        from_bar_id+=1
        if len(frame) == 0 or len(frame) < frame_length:
            break
        
        # check delta
        max_price = frame['high'].max()
        low_price = frame['low'].min()
        pips_delta_calc = (max_price - low_price) * pips_multiplier
        
        # check min delta
        if pips_delta_calc < pips_min_delta:
            continue
            
        # find pips ti line    
        before_close = frame["close"][-2]
        pips_to_line = abs(before_close - day_open_price) * pips_multiplier
        
        # check pips to line
        if pips_to_line > pips_to_line_max:
            continue
        
        return_data.append({
            "day_open_price":day_open_price,
            "frame":frame,
            "pips_to_line":pips_to_line
        })
        continue
        
    return return_data  

In [4]:
bar_sets = []
for j in pd.date_range('2014-01-01', periods=1500):
    result = getDayBarSets(data.loc[data.index.date == j.date()])
    if result:
        for one in result:
            bar_sets.append(one)

In [5]:
def create_learn_data(bars_info):
    """
    return data sets for machine learning
    """
    day_open_price = bars_info["day_open_price"]
    frame = bars_info["frame"]
    target_bar = frame.iloc[-1]
    bars_before_target = frame[:-1]
    pips_to_line = bars_info["pips_to_line"]
    
    # middle before target
    middle_before_target_set = []
    middle_before_target_set.append(bars_before_target["open"].mean())
    middle_before_target_set.append(bars_before_target["high"].mean())
    middle_before_target_set.append(bars_before_target["low"].mean())
    middle_before_target_set.append(bars_before_target["close"].mean())
    # ---
    middle_before_target_frame_set = normalize_values(middle_before_target_set)
    
    # middle 4 last bars
    last_four = bars_before_target[-4:]
    middle_last_four_set = []
    middle_last_four_set.append(last_four["open"].mean())
    middle_last_four_set.append(last_four["high"].mean())
    middle_last_four_set.append(last_four["low"].mean())
    middle_last_four_set.append(last_four["close"].mean())
    # ---
    middle_last_four_set = normalize_values(middle_last_four_set)
    
    # last four binaries
    binary_set = []
    
    for index, bar in last_four.iterrows():
        # open info
        binary_set.append(1 if bar["open"] > day_open_price else -1)
        # high info
        binary_set.append(1 if bar["high"] > day_open_price else -1)
        # low info
        binary_set.append(1 if bar["low"] > day_open_price else -1)
        # close info
        binary_set.append(1 if bar["close"] > day_open_price else -1)
        # type info
        binary_set.append(1 if bar["open"] < bar["close"] else -1)   
    
    # target
    target = 1 if target_bar["open"] < target_bar["close"] else -1
    
    data_set = middle_before_target_frame_set + middle_last_four_set + binary_set
    
    return data_set, target

def normalize_values(list_prices):
    normalize_data = []
    max_value = max(list_prices)
    min_value = min(list_prices)
    for val in list_prices:
        normal = normalize_value(min_value, max_value, val)
        normalize_data.append(normal)
    return normalize_data

def normalize_value(min_value, max_value, target_value):
    return (target_value - (max_value + min_value) / 2) / ((max_value - min_value) / 2)


In [6]:
data_sets = []
targets = []

for bars_info in bar_sets:
    data_set, target = create_learn_data(bars_info)
    data_sets.append(data_set)
    targets.append(target)

In [7]:
print("Data sets count " + str(len(data_sets)))

Data sets count 1471


In [8]:
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
%matplotlib inline
import matplotlib.pyplot as plt

In [9]:
data_buy = []
data_sell = []

for data_set, target in zip(data_sets, targets):
    if target == 1:
        data_buy.append(data_set)
    else:
        data_sell.append(data_set)

In [10]:
X_train, X_test, y_train, y_test = \
            train_test_split(data_sets, targets, shuffle=False, random_state=0)

In [14]:
from sklearn.decomposition import PCA
import numpy as np
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# оставляем главные компоненты (уменьшение размерности)
pca = PCA(n_components=5)
# подгоняем модель PCA на наборе данных
pca.fit(X_train_scaled)
# преобразуем данные к первым двум главным компонентам
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("Форма исходного массива: {}".format(str(np.array(X_train).shape)))
print("Форма массива после сокращения размерности: {}".format(str(X_train_pca.shape)))

Форма исходного массива: (1103, 28)
Форма массива после сокращения размерности: (1103, 5)


In [39]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

model = KNeighborsClassifier(n_neighbors=100)
    
#model = GradientBoostingClassifier(random_state=0, learning_rate=0.001, max_depth=3)

#model = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[100, 20, 5])

model.fit(X_train_pca, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=100, p=2,
           weights='uniform')

In [40]:
print("Правильность на обучающем наборе: {:.3f}".format(model.score(X_train_pca, y_train)))
print("Правильность на тестовом наборе: {:.3f}". format(model.score(X_test_pca, y_test)))

Правильность на обучающем наборе: 0.536
Правильность на тестовом наборе: 0.549
