In [1]:
import numpy as np
import pandas as pd
import os
import datetime as dt

In [2]:
# Creating list of pathes to files
path = 'D:\\Python Projects\\Stonks\\Tinkoff Online Trading\\Data'

file_paths = []
for root, dirs, files in os.walk(path, topdown = False):
    for name in files:
        if '3 sec' in name:
            file_paths.append(os.path.join(root, name))

In [65]:
# Cooking dataset for predicting
df = pd.read_csv(file_paths[0])

# Creating shifted time columns, so we know what samples will have correct max and min prices
df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])

# Adding shifted time to check time correctness of sample
df['Time_shift1'] = df.iloc[:, 0].shift(-20)
df['Time_shift2'] = df.iloc[:, 0].shift(-40)
df['Time_shift3'] = df.iloc[:, 0].shift(-60)

# Adding future prices to be able to classify sample
future_prices = pd.concat([df.iloc[:, 1:-3:41].shift(-20), 
                           df.iloc[:, 1:-3:41].shift(-40), 
                           df.iloc[:, 1:-3:41].shift(-60)], axis=1
                         )
df['Max_3min'] = np.max(future_prices, axis=1)
df['Min_3min'] = np.min(future_prices, axis=1)

# Filter good samples
df = df[(df.iloc[:, 821] - df.iloc[:, 0] < dt.timedelta(minutes=1, seconds=1)) &
        (df.iloc[:, 821] - df.iloc[:, 0] > dt.timedelta(minutes=0, seconds=59)) &
        (df.iloc[:, 822] - df.iloc[:, 0] < dt.timedelta(minutes=2, seconds=1)) &
        (df.iloc[:, 822] - df.iloc[:, 0] > dt.timedelta(minutes=1, seconds=59)) &
        (df.iloc[:, 823] - df.iloc[:, 0] < dt.timedelta(minutes=3, seconds=1)) &
        (df.iloc[:, 823] - df.iloc[:, 0] > dt.timedelta(minutes=2, seconds=59))
       ]
df = df.dropna()

In [66]:
# Filtering useful columns
columns = [column for i, column in enumerate(df.columns) if not (('Time_shift' in column) | (i == 0))]
df = df[columns]
#df = df.astype('float32')

# Renaming columns to group them
new_columns = []
j = 0
group_size = 3
for i, col in enumerate(columns):
    part_num = i//41
    if 'min' in col:
        new_columns.append(col)
    elif i%41 == 0:
        new_columns.append('price'+str(part_num))
    else:
        group_num = (i - part_num - 1) % 20 // group_size
        is_ask = (i - part_num - 1) // 20 % 2
        if is_ask:
            new_columns.append('ask'+str(group_num)+'_'+str(part_num))
        else: 
            new_columns.append('bid'+str(group_num)+'_'+str(part_num))

df.columns = new_columns
df = df.groupby(df.columns, axis=1).sum()

Unnamed: 0,Max_3min,Min_3min,ask0_0,ask0_1,ask0_10,ask0_11,ask0_12,ask0_13,ask0_14,ask0_15,...,price18,price19,price2,price3,price4,price5,price6,price7,price8,price9
0,152.09,151.7,88.0,88.0,88.0,88.0,88.0,90.0,158.0,158.0,...,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70
1,152.09,151.7,88.0,88.0,88.0,88.0,90.0,158.0,158.0,158.0,...,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70
2,152.09,151.7,88.0,88.0,88.0,90.0,158.0,158.0,158.0,164.0,...,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70
3,152.09,151.7,88.0,88.0,90.0,158.0,158.0,158.0,164.0,158.0,...,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70
4,152.09,151.7,88.0,88.0,158.0,158.0,158.0,164.0,158.0,158.0,...,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70,151.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11916,146.51,145.9,72.0,72.0,72.0,72.0,72.0,72.0,73.0,73.0,...,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51
11917,146.51,145.9,72.0,72.0,72.0,72.0,72.0,73.0,73.0,73.0,...,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51
11918,146.51,145.9,72.0,72.0,72.0,72.0,73.0,73.0,73.0,73.0,...,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51
11919,146.51,145.9,72.0,72.0,72.0,73.0,73.0,73.0,73.0,73.0,...,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51,146.51


In [47]:
new_columns

['price0',
 'bid0_0',
 'bid0_0',
 'bid0_0',
 'bid1_0',
 'bid1_0',
 'bid1_0',
 'bid2_0',
 'bid2_0',
 'bid2_0',
 'bid3_0',
 'bid3_0',
 'bid3_0',
 'bid4_0',
 'bid4_0',
 'bid4_0',
 'bid5_0',
 'bid5_0',
 'bid5_0',
 'bid6_0',
 'bid6_0',
 'ask0_0',
 'ask0_0',
 'ask0_0',
 'ask1_0',
 'ask1_0',
 'ask1_0',
 'ask2_0',
 'ask2_0',
 'ask2_0',
 'ask3_0',
 'ask3_0',
 'ask3_0',
 'ask4_0',
 'ask4_0',
 'ask4_0',
 'ask5_0',
 'ask5_0',
 'ask5_0',
 'ask6_0',
 'ask6_0',
 'price1',
 'bid0_1',
 'bid0_1',
 'bid0_1',
 'bid1_1',
 'bid1_1',
 'bid1_1',
 'bid2_1',
 'bid2_1',
 'bid2_1',
 'bid3_1',
 'bid3_1',
 'bid3_1',
 'bid4_1',
 'bid4_1',
 'bid4_1',
 'bid5_1',
 'bid5_1',
 'bid5_1',
 'bid6_1',
 'bid6_1',
 'ask0_1',
 'ask0_1',
 'ask0_1',
 'ask1_1',
 'ask1_1',
 'ask1_1',
 'ask2_1',
 'ask2_1',
 'ask2_1',
 'ask3_1',
 'ask3_1',
 'ask3_1',
 'ask4_1',
 'ask4_1',
 'ask4_1',
 'ask5_1',
 'ask5_1',
 'ask5_1',
 'ask6_1',
 'ask6_1',
 'price2',
 'bid0_2',
 'bid0_2',
 'bid0_2',
 'bid1_2',
 'bid1_2',
 'bid1_2',
 'bid2_2',
 'bid2_2',

In [35]:
print(1) if 0 else print(3)

3


In [6]:
def normalize(df, label=True):
     = df.shape[0]
    

SyntaxError: invalid syntax (<ipython-input-6-e14f62587a11>, line 1)