# Microestructura y Sistemas de Trading
##  Tarea 1: Effective Spread

José Armando Melchor Soto


---

## **Librerías** 

In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

---

## **Funciones**

#### **Gráfica Spread**

In [37]:
def grafica_spread(df):
    columnas_spread = [c for c in df.columns if "Spread" in c]
    plt.figure(figsize=(12, 6))
    for col in columnas_spread:
        plt.plot(df.index, df[col], label=col)
    plt.title('Gráfica de Spreads')
    plt.xlabel('Fecha')
    plt.ylabel('Spread')
    plt.legend()
    plt.grid()
    plt.show()

#### **Gráfica Spread promedio**

In [38]:
def grafica_spread_promedio(df):
    spread_cols = [c for c in df.columns if "Spread" in c]
    spread_means = [df[col].mean() for col in spread_cols]

    plt.figure()
    plt.bar(spread_cols, spread_means)
    for i, mean in enumerate(spread_means):
        plt.text(i, mean - 0.015, f'{mean:.4f}', ha='center', va='bottom')
    plt.title('Average Spread per window of AAPL')
    plt.ylabel('Average Spread')
    plt.show()

#### **Block Method**

In [39]:
def block_method(df1, windows):
   
    diff_col = next((c for c in df1.columns if c.lower() == 'diff'), None)
    if diff_col is None:
        raise KeyError("df1 debe tener columna 'Diff' (o 'diff').")

    for i in windows:
        n = len(df1)
        if 2*i > n:
            
            df1[f'Cov_{i}']    = [np.nan]*n
            df1[f'Spread_{i}'] = [np.nan]*n
            continue

        covs = [np.nan] * n  

        for k in range(n - 2*i + 1):
            up   = df1[diff_col].iloc[k       : k + i].to_numpy()
            down = df1[diff_col].iloc[k + i   : k + 2*i].to_numpy()

            cov = np.cov(up, down, bias=False)[0, 1]  
            covs[k + 2*i - 1] = cov                  

        df1[f'Cov_{i}']    = covs
        df1[f'Spread_{i}'] = 2 * np.sqrt(np.abs(df1[f'Cov_{i}']))

    bid_ask = pd.DataFrame(index=df1.index)
    for i in windows:
        bid_ask[f'Bid_{i}'] = Bid(df1, i)
        bid_ask[f'Ask_{i}'] = Ask(df1, i)
    
    grafica_spread(df1)


    return display(bid_ask)



#### **Serial Covariance**

In [40]:
def serial_covariance(df2, windows):
    for i in windows:
        df2[f'Cov_{i}'] = df2['diff'].rolling(window=i).cov(df2['diff'].shift(i))
        df2[f'Spread_{i}'] = 2 * np.sqrt(np.abs(df2[f'Cov_{i}']))

    bid_ask = pd.DataFrame()

    for window in windows:
        bid_ask[f'Bid_{window}'] = Bid(df2, window)
        bid_ask[f'Ask_{window}'] = Ask(df2, window)

    grafica_spread(df2)
    
    return display(df2)

---

### Importación de Datos

In [41]:
df = pd.read_csv('aapl_5m_train.csv').drop(columns=['Unnamed: 0','Timestamp', 'Gmtoffset', 'Volume'])
df.head()

Unnamed: 0,Datetime,Open,High,Low,Close
0,2021-01-04 14:30:00,133.570007,133.611602,132.389999,132.809997
1,2021-01-04 14:35:00,132.75,132.75,131.809997,131.889999
2,2021-01-04 14:40:00,131.5,132.339996,131.5,132.059997
3,2021-01-04 14:45:00,132.0,132.25,131.899993,132.25
4,2021-01-04 14:50:00,132.0,132.018096,131.520004,131.589996


In [42]:
df1 = df.copy()
df1['diff'] = df1['Close'].diff() 
df1 = df1.dropna().reset_index(drop=True)
df1.head()

Unnamed: 0,Datetime,Open,High,Low,Close,diff
0,2021-01-04 14:35:00,132.75,132.75,131.809997,131.889999,-0.919998
1,2021-01-04 14:40:00,131.5,132.339996,131.5,132.059997,0.169998
2,2021-01-04 14:45:00,132.0,132.25,131.899993,132.25,0.190003
3,2021-01-04 14:50:00,132.0,132.018096,131.520004,131.589996,-0.660004
4,2021-01-04 14:55:00,132.0,132.089996,131.300003,131.448898,-0.141098


In [43]:
df2 = df1.copy()
df2['diff_1'] = df2['diff'].shift(1)
df2 = df2.dropna().reset_index(drop=True)
df2.head()

Unnamed: 0,Datetime,Open,High,Low,Close,diff,diff_1
0,2021-01-04 14:40:00,131.5,132.339996,131.5,132.059997,0.169998,-0.919998
1,2021-01-04 14:45:00,132.0,132.25,131.899993,132.25,0.190003,0.169998
2,2021-01-04 14:50:00,132.0,132.018096,131.520004,131.589996,-0.660004,0.190003
3,2021-01-04 14:55:00,132.0,132.089996,131.300003,131.448898,-0.141098,-0.660004
4,2021-01-04 15:00:00,131.544998,131.669998,131.070007,131.119995,-0.328903,-0.141098


In [44]:
windows = [5, 10, 20, 50, 100]

In [45]:
def Bid(df, windows):
    return np.where(df['diff'] > 0, df['Close'] - df[f'Spread_{windows}'], df['Close'])

def Ask(df, windows):
    return np.where(df['diff'] < 0, df['Close'] + df[f'Spread_{windows}'], df['Close'])

In [None]:
block_method(df1, windows)

In [None]:
serial_covariance(df2, windows)

Unnamed: 0,Datetime,Open,High,Low,Close,diff,diff_1,Cov_5,Spread_5,Cov_10,Spread_10,Cov_20,Spread_20,Cov_50,Spread_50,Cov_100,Spread_100
0,2021-01-04 14:40:00,131.500000,132.339996,131.500000,132.059997,0.169998,-0.919998,,,,,,,,,,
1,2021-01-04 14:45:00,132.000000,132.250000,131.899993,132.250000,0.190003,0.169998,,,,,,,,,,
2,2021-01-04 14:50:00,132.000000,132.018096,131.520004,131.589996,-0.660004,0.190003,,,,,,,,,,
3,2021-01-04 14:55:00,132.000000,132.089996,131.300003,131.448898,-0.141098,-0.660004,,,,,,,,,,
4,2021-01-04 15:00:00,131.544998,131.669998,131.070007,131.119995,-0.328903,-0.141098,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39566,2022-12-30 20:40:00,129.029998,129.130004,128.919998,129.054992,0.014999,0.019989,-0.005183,0.143989,0.001034,0.064309,0.001851,0.086049,-0.000548,0.046804,-0.010052,0.200519
39567,2022-12-30 20:45:00,129.059997,129.399993,129.020004,129.380004,0.325012,0.014999,-0.010577,0.205689,-0.000199,0.028237,0.001259,0.070957,0.000557,0.047191,-0.010005,0.200046
39568,2022-12-30 20:50:00,129.375000,129.929992,129.330001,129.764404,0.384400,0.325012,-0.007774,0.176345,0.009258,0.192435,0.002945,0.108540,0.000333,0.036521,-0.010244,0.202426
39569,2022-12-30 20:55:00,129.764999,129.949996,129.619995,129.949996,0.185592,0.384400,-0.007761,0.176193,0.008288,0.182080,0.001540,0.078493,0.000748,0.054715,-0.010351,0.203482
