In [None]:
import pandas as pd 
import numpy as np

In [None]:
# Read data
df = pd.read_parquet('../data/binance_data_BTCFDUSD_20240528183200.parquet',engine='pyarrow')

In [None]:
df.info()

In [None]:
# Datetime index
df.index = pd.to_datetime(df.index)

## Price plots

Problem of memory with plotly

In [None]:
import plotly.graph_objects as go 
from datetime import datetime

In [None]:
# Candlestick
#fig = go.Figure(data=[go.Candlestick(x=df.index,
#                                     open=df['Open'],
#                                     high=df['High'],
#                                     low=df['Low'],
#                                     close=df['Close'])])
#
#fig.show()


In [None]:
# Percentage change
df_pct_change_1s = df.pct_change(periods=1).shift(periods=-1)      # 1 second
df_pct_change_10s = df.pct_change(periods=10).shift(periods=-1)    # 10 seconds
df_pct_change_1min = df.pct_change(periods=60).shift(periods=-1)   # 1 minute
df_pct_change_1h = df.pct_change(periods=3600).shift(periods=-1)   # 1 hour

In [None]:
df_pct_change_10s

In [None]:
# Plot "Close" percentage change
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(10, 6))
ax = plt.subplot(111)
ax.plot(df.index, df_pct_change_1h['Close'], ls=':',label='1 hour')
ax.plot(df.index, df_pct_change_1min['Close'], ls='--',label='1 min')
ax.plot(df.index, df_pct_change_10s['Close'], ls='-.',label='10 sec')
ax.plot(df.index, df_pct_change_1s['Close'], ls='-',label='1 sec')
ax.set_xlabel('Date & Hour')
ax.set_ylabel('Change percentage')
ax.legend()
ax.grid(True)
fig.tight_layout()

In [None]:
# Moving average
df['SMA_1hour'] = df['Close'].rolling(window=3600, min_periods=1).mean()
df['SMA_1day'] = df['Close'].rolling(window=86400, min_periods=1).mean()
df['SMA_10day'] = df['Close'].rolling(window=86400 * 10, min_periods=1).mean()

In [None]:
fig = plt.figure(figsize=(10, 6))
ax = plt.subplot(111)
ax.plot(df.index, df['SMA_1day'], ls=':',label='1 day')
ax.plot(df.index, df['SMA_10day'], ls='--',label='10 day')
ax.plot(df.index, df['SMA_1hour'], ls='-',label='1 hour')
ax.set_xlabel('Date & Hour')
ax.set_ylabel('Moving average')
ax.legend()
ax.grid(True)
fig.tight_layout()

## Correlation analyses

In [None]:
# Correlation matrix (Pearson)
corr_matrix = df.corr()

In [None]:
# Plot heatmap
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(15,12))
sns.heatmap(corr_matrix, annot=False,cmap='coolwarm')
plt.show()

In [None]:
# Separate highly and non highly correlated variables
# Create mask for both condition
mask_nhc  = (corr_matrix > -0.5) & (corr_matrix < 0.5) #hc : highly correlated
mask_hc = ~mask_nhc

In [None]:
# Correlation Matrix with the right conditons
corr_hc  = corr_matrix[mask_hc]
corr_nhc = corr_matrix[mask_nhc]

In [None]:
corr_hc['Close']

In [None]:
corr_nhc['Close']

In [None]:
duplicate_indexes = df.index[df.index.duplicated()]
print(f"Duplicated indexes : {duplicate_indexes}")
df = df[~df.index.duplicated(keep='first')]

In [None]:
ask_bid_columns = [col for col in df.columns if 'bid' in col or 'ask' in col]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

for col in ask_bid_columns:
    print(col)
    plt.figure(figsize=(8,6))
    sns.scatterplot(data=df,x=str(col), y='Close')#, hue=col, data=df)
    plt.xlabel(col)
    plt.ylabel('Close')
    plt.show()

In [None]:
df_pct_change_10s