In [1]:
import pandas as pd

df = pd.read_csv(
    filepath_or_buffer="prices_data/[SP500]_H4_01-03-2020__01-03-2024.csv",
    delimiter="\t"
)

# print(df.head())
# print(df.columns)

df = df.rename(
    columns = {
        '<DATE>':'date', 
        '<TIME>':'time', 
        '<OPEN>':'open', 
        '<HIGH>':'high', 
        '<LOW>':'low', 
        '<CLOSE>':'close'
    }
)

df = df.drop(['<TICKVOL>', '<VOL>', '<SPREAD>'], axis=1)

print(df)

            date      time     open     high      low    close
0     2020.03.02  00:00:00  2903.82  2966.68  2892.20  2948.32
1     2020.03.02  04:00:00  2948.07  2985.05  2944.08  2971.62
2     2020.03.02  08:00:00  2971.50  3022.61  2960.63  2975.41
3     2020.03.02  12:00:00  2975.53  2979.78  2906.57  2962.53
4     2020.03.02  16:00:00  2963.28  3040.35  2944.43  3036.27
...          ...       ...      ...      ...      ...      ...
6173  2024.02.29  08:00:00  5073.73  5073.73  5051.60  5059.53
6174  2024.02.29  12:00:00  5059.28  5089.58  5054.83  5086.78
6175  2024.02.29  16:00:00  5087.03  5096.78  5061.13  5081.08
6176  2024.02.29  20:00:00  5081.33  5105.08  5079.06  5090.49
6177  2024.03.01  00:00:00  5092.02  5101.39  5084.81  5101.27

[6178 rows x 6 columns]


In [10]:

def pivotid(df1, l, n1, n2): #n1 n2 before and after candle l
    if l-n1 < 0 or l+n2 >= len(df1):
        return 0
    
    pividlow=1
    pividhigh=1
    for i in range(l-n1, l+n2+1):
        if(df1.low[l]>df1.low[i]):
            pividlow=0
        if(df1.high[l]<df1.high[i]):
            pividhigh=0
    if pividlow and pividhigh:
        return 3
    elif pividlow:
        return 1
    elif pividhigh:
        return 2
    else:
        return 0
    
df['pivot'] = df.apply(lambda x: pivotid(df, x.name,20,10), axis=1)


df['pivot'].value_counts()

pivot
0    5900
2     148
1     130
Name: count, dtype: int64

In [11]:
import numpy as np
def pointpos(x, eps=20):
    if x['pivot']==1:
        return x['low']-eps
    elif x['pivot']==2:
        return x['high']+eps
    else:
        return np.nan

df['pointpos'] = df.apply(lambda row: pointpos(row), axis=1)


In [12]:
import plotly.graph_objects as go

dfpl = df[-300:-1]
fig = go.Figure(data=[go.Candlestick(x=dfpl.index,
                open=dfpl['open'],
                high=dfpl['high'],
                low=dfpl['low'],
                close=dfpl['close'],
                increasing_line_color= 'green', 
                decreasing_line_color= 'red')])

fig.add_scatter(x=dfpl.index, y=dfpl['pointpos'], mode="markers",
                marker=dict(size=5, color="MediumPurple"),
                name="pivot")
fig.update_layout(xaxis_rangeslider_visible=False)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.update_layout(paper_bgcolor='black', plot_bgcolor='black')

fig

In [21]:

from sklearn.cluster import DBSCAN

X = df[['pointpos']].dropna()

print(X)

clustering = DBSCAN(eps=30, min_samples=2).fit(X)

print(clustering.labels_)


      pointpos
36     2679.92
60     2342.60
73     2252.63
86     2164.80
110    2663.83
...        ...
6098   5068.37
6105   4899.95
6121   5062.92
6140   4924.93
6151   5131.01

[278 rows x 1 columns]
[ 0 -1 -1 -1  0  0 -1  1  1  0  2  3  2  3  2  1  4  5  1  2  6  2  6  4
  5  5  5  6  6  7  7  5  8  5  5  7  6  6  7  8  7  7  7  6  8  8  8  8
  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  9  8  9  8  9  8  8  8  8
  9  9  8  9  8  9  9  9  9  8  9  8  8  8  8  8  8  8  8  8  8  8  8  8
  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
  8  8  8  8  8  8  8  8  8  8  8  7  8  8  8  8  8  8  8  8  8  8  8  8
  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8

In [22]:
df['cluster_labels'] = np.NaN

for index, label in zip(X.index, clustering.labels_):
    df.loc[index, 'cluster_labels'] = label

df


Unnamed: 0,date,time,open,high,low,close,pivot,pointpos,cluster_labels
0,2020.03.02,00:00:00,2903.82,2966.68,2892.20,2948.32,0,,
1,2020.03.02,04:00:00,2948.07,2985.05,2944.08,2971.62,0,,
2,2020.03.02,08:00:00,2971.50,3022.61,2960.63,2975.41,0,,
3,2020.03.02,12:00:00,2975.53,2979.78,2906.57,2962.53,0,,
4,2020.03.02,16:00:00,2963.28,3040.35,2944.43,3036.27,0,,
...,...,...,...,...,...,...,...,...,...
6173,2024.02.29,08:00:00,5073.73,5073.73,5051.60,5059.53,0,,
6174,2024.02.29,12:00:00,5059.28,5089.58,5054.83,5086.78,0,,
6175,2024.02.29,16:00:00,5087.03,5096.78,5061.13,5081.08,0,,
6176,2024.02.29,20:00:00,5081.33,5105.08,5079.06,5090.49,0,,


In [23]:

unique_labels = df['cluster_labels'].value_counts()
unique_labels = list(unique_labels.index)
unique_labels.remove(-1)

print(unique_labels)


[8.0, 9.0, 7.0, 5.0, 6.0, 2.0, 0.0, 1.0, 10.0, 3.0, 4.0, 11.0]


In [24]:

keyZones = []
for label in unique_labels:
     min = df[df['cluster_labels'] == label]['pointpos'].min()
     max = df[df['cluster_labels'] == label]['pointpos'].max()
     keyZones.append((min, max))

len(keyZones)

12

In [25]:
df2 = df[-300:-1]
fig2 = go.Figure(
    data=[go.Candlestick(
            x=df2.index,
            open=df2['open'],
            high=df2['high'],
            low=df2['low'],
            close=df2['close'],
            increasing_line_color= 'green', 
            decreasing_line_color= 'red')
        ]
)

fig2.add_scatter(x=df2.index, y=df2['pointpos'], mode="markers",
                marker=dict(size=5, color="MediumPurple"),
                name="pivot")

for (min, max) in keyZones:
    fig2.add_hrect(y0=min, y1=max)

fig2.update_layout(xaxis_rangeslider_visible=False)
fig2.update_xaxes(showgrid=False)
fig2.update_yaxes(showgrid=False)
fig2.update_layout(paper_bgcolor='black', plot_bgcolor='black')

fig2