In [None]:
import pandas as pd
import pandas_datareader as pdr
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler

In [None]:
dt_start = '2006-01-10'
df = pdr.get_data_fred(['DTWEXEMEGS', 'DCOILBRENTEU'], start=dt_start)
df.replace(0.0, np.NaN, inplace=True)
df.fillna(method='ffill', axis=0, inplace=True)
df.rename(columns={'DTWEXEMEGS': 'dollar', 'DCOILBRENTEU': 'brent'}, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4185 entries, 2006-01-10 to 2022-01-24
Freq: B
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   dollar  4185 non-null   float64
 1   brent   4185 non-null   float64
dtypes: float64(2)
memory usage: 98.1 KB


In [None]:
df1 = pdr.get_data_moex('USD000UTSTOM', start=dt_start)
df1 = df1[df1.BOARDID == 'CETS']
df1.drop_duplicates(inplace=True)
df1.replace(0.0, np.NaN, inplace=True)
df1.fillna(method='ffill', axis=0, inplace=True)

In [None]:
df1['result'] = (df1.CLOSE - df1.OPEN) / df1.HIGH
df1 = df1[['result']]
v_std = df1.result.std()
df1['cl'] = 0
df1.loc[df1.result > v_std * -2, 'cl'] = 1
df1.loc[df1.result > v_std * -1, 'cl'] = 2
df1.loc[df1.result > 0, 'cl'] = 3
df1.loc[df1.result > v_std, 'cl'] = 4
df1.loc[df1.result > v_std * 2, 'cl'] = 5
df1.cl.value_counts()

2    1662
3    1652
1     270
4     256
5     104
0      83
Name: cl, dtype: int64

In [None]:
df = df.join(df1.cl)
df.cl.fillna(method='ffill', axis=0, inplace=True)
df.cl = df.cl.astype('int8')
df.info()

Unnamed: 0_level_0,dollar,brent,cl
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2006-01-10,100.0931,62.32,3.0
2006-01-11,100.0905,61.54,2.0
2006-01-12,99.8573,62.95,3.0
2006-01-13,100.0169,61.58,2.0
2006-01-16,100.0261,62.34,2.0


In [None]:
ls_ind = ['IMOEX', 'RGBITR', 'RUCBITR']
for el_ind in ls_ind:
  df1 = pdr.get_data_moex(el_ind, start=dt_start)
  df1 = df1[['CLOSE']]
  df1.drop_duplicates(inplace=True)
  df1.rename({'CLOSE': el_ind}, axis=1, inplace=True)
  df = df.join(df1)
df.fillna(method='ffill', axis=0, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4185 entries, 2006-01-10 to 2022-01-24
Freq: B
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   dollar   4185 non-null   float64
 1   brent    4185 non-null   float64
 2   cl       4185 non-null   int8   
 3   IMOEX    4185 non-null   float64
 4   RGBITR   4185 non-null   float64
 5   RUCBITR  4185 non-null   float64
dtypes: float64(5), int8(1)
memory usage: 360.3 KB


In [None]:
y = df.cl
x = df.drop('cl', axis=1)

In [None]:
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.1, shuffle=False)

In [None]:
model = LogisticRegression()
model.fit(x_train, y_train)
model.score(x_train, y_train)

0.4335459861775651

In [None]:
model.score(x_test, y_test)

0.4354066985645933

In [None]:
y = df.cl[5:]
x = df.drop('cl', axis=1)[:-5]

In [None]:
x = df.drop('cl', axis=1)[-5:]
x_scaled = scaler.transform(x)
pred = model.predict(x_scaled)
pred

array([2, 2, 2, 2, 2], dtype=int8)