## Modelo de Regresión Logística
Este modelo se va a aplicar a datos del intrumento EURUSDT bajados de la API de Binance

In [None]:
# Se realizan las importaciones necesarias
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
 
plt.style.use("seaborn-v0_8-whitegrid")

In [3]:
# obtención de los datos
data = pd.read_csv("EURUSDT_2020_5min.csv")
data = data.rename(columns = {"Close time": "time", "Close": "price"})
data.set_index("time", inplace=True)

In [4]:
data

Unnamed: 0_level_0,price
time,Unnamed: 1_level_1
2020-01-03 08:04:59.999,1.1188
2020-01-03 08:09:59.999,1.1188
2020-01-03 08:14:59.999,1.1188
2020-01-03 08:19:59.999,1.1196
2020-01-03 08:24:59.999,1.1196
...,...
2020-12-30 23:44:59.999,1.2311
2020-12-30 23:49:59.999,1.2310
2020-12-30 23:54:59.999,1.2309
2020-12-30 23:59:59.999,1.2310


In [5]:
data["returns"] = np.log(data.div(data.shift(1)))

In [6]:
data

Unnamed: 0_level_0,price,returns
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-03 08:04:59.999,1.1188,
2020-01-03 08:09:59.999,1.1188,0.000000
2020-01-03 08:14:59.999,1.1188,0.000000
2020-01-03 08:19:59.999,1.1196,0.000715
2020-01-03 08:24:59.999,1.1196,0.000000
...,...,...
2020-12-30 23:44:59.999,1.2311,-0.000081
2020-12-30 23:49:59.999,1.2310,-0.000081
2020-12-30 23:54:59.999,1.2309,-0.000081
2020-12-30 23:59:59.999,1.2310,0.000081


In [None]:
data.dropna(inplace=True)

In [None]:
data["direction"] = np.sign(data.returns)

In [None]:
data

Unnamed: 0_level_0,price,returns,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-03 08:09:59.999,1.1188,0.000000,0.0
2020-01-03 08:14:59.999,1.1188,0.000000,0.0
2020-01-03 08:19:59.999,1.1196,0.000715,1.0
2020-01-03 08:24:59.999,1.1196,0.000000,0.0
2020-01-03 08:29:59.999,1.1159,-0.003310,-1.0
...,...,...,...
2020-12-30 23:44:59.999,1.2311,-0.000081,-1.0
2020-12-30 23:49:59.999,1.2310,-0.000081,-1.0
2020-12-30 23:54:59.999,1.2309,-0.000081,-1.0
2020-12-30 23:59:59.999,1.2310,0.000081,1.0


In [None]:
data.direction.value_counts()

direction
 0.0    36592
 1.0    33987
-1.0    33616
Name: count, dtype: int64

In [None]:
lags = 5

In [None]:
cols = []
for lag in range(1, lags + 1):
    col = "lag{}".format(lag)
    data[col] = data.returns.shift(lag)
    cols.append(col)
data.dropna(inplace = True)

In [None]:
data

Unnamed: 0_level_0,price,returns,direction,lag1,lag2,lag3,lag4,lag5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-03 08:34:59.999,1.1191,0.002864,1.0,-0.003310,0.000000,0.000715,0.000000,0.000000
2020-01-03 08:39:59.999,1.1191,0.000000,0.0,0.002864,-0.003310,0.000000,0.000715,0.000000
2020-01-03 08:44:59.999,1.1192,0.000089,1.0,0.000000,0.002864,-0.003310,0.000000,0.000715
2020-01-03 08:49:59.999,1.1192,0.000000,0.0,0.000089,0.000000,0.002864,-0.003310,0.000000
2020-01-03 08:54:59.999,1.1192,0.000000,0.0,0.000000,0.000089,0.000000,0.002864,-0.003310
...,...,...,...,...,...,...,...,...
2020-12-30 23:44:59.999,1.2311,-0.000081,-1.0,-0.000162,0.000162,-0.000162,0.000406,0.000406
2020-12-30 23:49:59.999,1.2310,-0.000081,-1.0,-0.000081,-0.000162,0.000162,-0.000162,0.000406
2020-12-30 23:54:59.999,1.2309,-0.000081,-1.0,-0.000081,-0.000081,-0.000162,0.000162,-0.000162
2020-12-30 23:59:59.999,1.2310,0.000081,1.0,-0.000081,-0.000081,-0.000081,-0.000162,0.000162
