-
Notifications
You must be signed in to change notification settings - Fork 4
/
util.py
122 lines (97 loc) · 4.81 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pandas as pd
from sklearn import preprocessing
import numpy as np
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
history_points = 50
from db import Db
import sys
def get_raw_data(symbol,print_sample=True):
model = Db()
records = model.get_data(symbol)
data = pd.DataFrame(data = records,
columns = ['open_val','high_val','low_val','close_val','volume'])
data = data.drop(0, axis=0)
if print_sample:
print(data.head(5))
print(data.count())
return data.to_numpy()
def create_dataset(symbol):
data = get_raw_data(symbol)
data_normaliser = preprocessing.MinMaxScaler()
data_normalised = data_normaliser.fit_transform(data)
ohlcv_histories_normalised = get_ohlcv_histories_normalised(data_normalised)
next_day_open_values_normalised = np.array(
[data_normalised[:, 0][i + history_points].copy() for i in range(len(data_normalised) - history_points)])
next_day_open_values_normalised = np.expand_dims(next_day_open_values_normalised, -1)
next_day_open_values = get_next_day_open_values(data)
y_normaliser = get_y_normaliser(next_day_open_values)
technical_indicators_normalised = get_technical_indicators(ohlcv_histories_normalised)
assert ohlcv_histories_normalised.shape[0] == next_day_open_values_normalised.shape[0] == \
technical_indicators_normalised.shape[0]
return ohlcv_histories_normalised, technical_indicators_normalised, next_day_open_values_normalised, \
next_day_open_values, y_normaliser
def get_y_normaliser(data):
y_normaliser = preprocessing.MinMaxScaler()
y_normaliser.fit(data)
return y_normaliser
def get_next_day_open_values(data):
next_day_open_values = np.array([data[:, 0][i + history_points].copy() for i in range(len(data) - history_points)])
next_day_open_values = np.expand_dims(next_day_open_values, -1)
return next_day_open_values
def get_ohlcv_histories_normalised(data_normalised, last=0):
# using the last {history_points} open close high low volume data points, predict the next open value
if (last == 0):
rng = range(len(data_normalised) - history_points)
else:
rng = range(len(data_normalised) - history_points + 1 - last, len(data_normalised) - history_points + 2)
return np.array(
[data_normalised[i:i + history_points].copy() for i in rng])
def get_next_day_open_values_normalised(data_normalised, last=0):
# using the last {history_points} open close high low volume data points, predict the next open value
if (last == 0):
rng = range(len(data_normalised) - history_points)
else:
rng = range(len(data_normalised) - history_points + 1 - last, len(data_normalised) - history_points + 2)
return np.array(
[data_normalised[i:i + history_points].copy() for i in rng])
def get_technical_indicators(ohlcv_histories_normalised):
technical_indicators = []
for his in ohlcv_histories_normalised:
# note since we are using his[3] we are taking the SMA of the closing price
sma = np.mean(his[:, 3])
macd = calc_ema(his, 12) - calc_ema(his, 26)
technical_indicators.append(np.array([sma]))
# technical_indicators.append(np.array([sma,macd,]))
technical_indicators = np.array(technical_indicators)
tech_ind_scaler = preprocessing.MinMaxScaler()
return tech_ind_scaler.fit_transform(technical_indicators)
def calc_ema(values, time_period):
sma = np.mean(values[:, 3])
ema_values = [sma]
k = 2 / (1 + time_period)
for i in range(len(values) - time_period, len(values)):
close = values[i][3]
ema_values.append(close * k + ema_values[-1] * (1 - k))
return ema_values[-1]
"""
def multiple_csv_to_dataset(test_set_name):
import os
ohlcv_histories = 0
technical_indicators = 0
next_day_open_values = 0
for csv_file_path in list(filter(lambda x: x.endswith('daily.csv'), os.listdir('./'))):
if not csv_file_path == test_set_name:
print(csv_file_path)
if type(ohlcv_histories) == int:
ohlcv_histories, technical_indicators, next_day_open_values, _, _ = csv_to_dataset(csv_file_path)
else:
a, b, c, _, _ = csv_to_dataset(csv_file_path)
ohlcv_histories = np.concatenate((ohlcv_histories, a), 0)
technical_indicators = np.concatenate((technical_indicators, b), 0)
next_day_open_values = np.concatenate((next_day_open_values, c), 0)
ohlcv_train = ohlcv_histories
tech_ind_train = technical_indicators
y_train = next_day_open_values
ohlcv_test, tech_ind_test, y_test, unscaled_y_test, y_normaliser = csv_to_dataset(test_set_name)
return ohlcv_train, tech_ind_train, y_train, ohlcv_test, tech_ind_test, y_test, unscaled_y_test, y_normaliser
"""