In [1]:
import pandas as pd

df = pd.read_csv(
    filepath_or_buffer="prices_data/[SP500]_Daily_01-03-2020__01-03-2024.csv",
    delimiter="\t"
)

# print(df.head())
# print(df.columns)

df = df.rename(
    columns = {
        '<DATE>':'date', 
        '<TIME>':'time', 
        '<OPEN>':'open', 
        '<HIGH>':'high', 
        '<LOW>':'low', 
        '<CLOSE>':'close'
    }
)

df = df.drop(['<TICKVOL>', '<VOL>', '<SPREAD>'], axis=1)

print(df)

            date     open     high      low    close
0     2020.03.02  2903.82  3093.92  2892.20  3071.83
1     2020.03.03  3076.48  3138.15  2975.46  2993.38
2     2020.03.04  2990.14  3130.41  2979.02  3111.44
3     2020.03.05  3109.86  3114.48  2999.13  3025.40
4     2020.03.06  3025.18  3038.54  2901.28  2963.15
...          ...      ...      ...      ...      ...
1031  2024.02.26  5084.76  5096.65  5065.45  5066.33
1032  2024.02.27  5066.71  5082.13  5056.85  5080.28
1033  2024.02.28  5079.76  5079.96  5053.42  5058.93
1034  2024.02.29  5063.23  5105.08  5051.60  5090.49
1035  2024.03.01  5092.02  5140.52  5080.72  5130.30

[1036 rows x 5 columns]


In [2]:

def pivotid(df1, l, n1, n2): #n1 n2 before and after candle l
    if l-n1 < 0 or l+n2 >= len(df1):
        return 0
    
    pividlow=1
    pividhigh=1
    for i in range(l-n1, l+n2+1):
        if(df1.low[l]>df1.low[i]):
            pividlow=0
        if(df1.high[l]<df1.high[i]):
            pividhigh=0
    if pividlow and pividhigh:
        return 3
    elif pividlow:
        return 1
    elif pividhigh:
        return 2
    else:
        return 0
    
df['pivot'] = df.apply(lambda x: pivotid(df, x.name,20,10), axis=1)


df['pivot'].value_counts()

pivot
0    1001
1      18
2      17
Name: count, dtype: int64

In [3]:
import numpy as np
def pointpos(x, eps=20):
    if x['pivot']==1:
        return x['low']-eps
    elif x['pivot']==2:
        return x['high']+eps
    else:
        return np.nan

df['pointpos'] = df.apply(lambda row: pointpos(row), axis=1)


In [4]:
import plotly.graph_objects as go

dfpl = df
fig = go.Figure(data=[go.Candlestick(x=dfpl.index,
                open=dfpl['open'],
                high=dfpl['high'],
                low=dfpl['low'],
                close=dfpl['close'],
                increasing_line_color= 'green', 
                decreasing_line_color= 'red')])

fig.add_scatter(x=dfpl.index, y=dfpl['pointpos'], mode="markers",
                marker=dict(size=5, color="MediumPurple"),
                name="pivot")
fig.update_layout(xaxis_rangeslider_visible=False)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.update_layout(paper_bgcolor='black', plot_bgcolor='black')

fig

In [5]:
from sklearn.cluster import MeanShift

X = df[['pointpos']].dropna()

print(X)

clustering = MeanShift(bandwidth=80, cluster_all=True).fit(X)

print(clustering.labels_)

     pointpos
42    2992.60
69    3253.09
131   3608.17
147   3189.31
159   3569.75
173   3213.17
179   3694.26
248   3983.73
260   3702.98
307   4264.45
310   4014.63
337   4117.50
391   4570.33
411   4250.27
447   4763.49
456   4474.29
477   4837.63
491   4201.17
514   4086.28
537   4657.17
574   3790.00
594   3615.57
636   4345.20
678   3469.91
721   4158.51
728   3743.69
756   4215.39
783   3789.29
818   4207.17
821   4028.03
881   4626.66
897   4316.33
907   4560.46
930   4180.99
947   4083.29
[9 5 4 5 4 5 2 1 2 0 1 0 3 0 6 7 6 0 1 3 2 4 7 8 0 2 0 2 0 1 3 7 3 0 1]


In [6]:

from sklearn.cluster import DBSCAN

X = df[['pointpos']].dropna()

print(X)

clustering = DBSCAN(eps=40, min_samples=2).fit(X)

print(clustering.labels_)


     pointpos
42    2992.60
69    3253.09
131   3608.17
147   3189.31
159   3569.75
173   3213.17
179   3694.26
248   3983.73
260   3702.98
307   4264.45
310   4014.63
337   4117.50
391   4570.33
411   4250.27
447   4763.49
456   4474.29
477   4837.63
491   4201.17
514   4086.28
537   4657.17
574   3790.00
594   3615.57
636   4345.20
678   3469.91
721   4158.51
728   3743.69
756   4215.39
783   3789.29
818   4207.17
821   4028.03
881   4626.66
897   4316.33
907   4560.46
930   4180.99
947   4083.29
[-1  0  1  0  1  0  2  3  2  4  3  5  6  4 -1 -1 -1  4  5  7  8  1  9 -1
  4 -1  4  8  4  3  7  9  6  4  5]


In [7]:
df['cluster_labels'] = np.NaN

for index, label in zip(X.index, clustering.labels_):
    df.loc[index, 'cluster_labels'] = label

df


Unnamed: 0,date,open,high,low,close,pivot,pointpos,cluster_labels
0,2020.03.02,2903.82,3093.92,2892.20,3071.83,0,,
1,2020.03.03,3076.48,3138.15,2975.46,2993.38,0,,
2,2020.03.04,2990.14,3130.41,2979.02,3111.44,0,,
3,2020.03.05,3109.86,3114.48,2999.13,3025.40,0,,
4,2020.03.06,3025.18,3038.54,2901.28,2963.15,0,,
...,...,...,...,...,...,...,...,...
1031,2024.02.26,5084.76,5096.65,5065.45,5066.33,0,,
1032,2024.02.27,5066.71,5082.13,5056.85,5080.28,0,,
1033,2024.02.28,5079.76,5079.96,5053.42,5058.93,0,,
1034,2024.02.29,5063.23,5105.08,5051.60,5090.49,0,,


In [8]:

unique_labels = df['cluster_labels'].value_counts()
unique_labels = list(unique_labels.index)
#unique_labels.remove(-1)

print(unique_labels)


[4.0, -1.0, 0.0, 1.0, 3.0, 5.0, 2.0, 6.0, 7.0, 8.0, 9.0]


In [9]:

keyZonesMinMax = []
keyZonesMean = []
keyZonesMinMaxFlattened = []
for label in unique_labels:
     min = df[df['cluster_labels'] == label]['pointpos'].min()
     max = df[df['cluster_labels'] == label]['pointpos'].max()
     mean = df[df['cluster_labels'] == label]['pointpos'].mean()
     keyZonesMinMax.append((min, max))
     keyZonesMean.append(mean)
     keyZonesMinMaxFlattened.append(min)
     keyZonesMinMaxFlattened.append(max)

keyZonesMinMaxFlattened.sort(reverse=True)
print("length : ", len(keyZonesMinMaxFlattened))
print(keyZonesMinMaxFlattened)

length :  22
[4837.63, 4657.17, 4626.66, 4570.33, 4560.46, 4345.2, 4316.33, 4264.45, 4158.51, 4117.5, 4083.29, 4028.03, 3983.73, 3790.0, 3789.29, 3702.98, 3694.26, 3615.57, 3569.75, 3253.09, 3189.31, 2992.6]


In [10]:
df2 = df
fig2 = go.Figure(
    data=[go.Candlestick(
            x=df2.index,
            open=df2['open'],
            high=df2['high'],
            low=df2['low'],
            close=df2['close'],
            increasing_line_color= 'green', 
            decreasing_line_color= 'red')
        ]
)

# fig2.add_scatter(x=dfNoNaN.index, y=dfNoNaN['pointpos'], mode="markers",
#                 marker=dict(size=5, color="MediumPurple"),
#                 name="pivot")

# for (min, max) in keyZonesMinMax:
#     fig2.add_hrect(y0=min, y1=max)
for k in keyZonesMinMaxFlattened:
    fig2.add_hline(y=k)

fig2.update_layout(xaxis_rangeslider_visible=False)
fig2.update_xaxes(showgrid=False)
fig2.update_yaxes(showgrid=False)
fig2.update_layout(paper_bgcolor='black', plot_bgcolor='black')

fig2

In [11]:

i = 0
lastIndexKeyZone = 0
keysToRemove = []
lenght = len(keyZonesMinMaxFlattened)

while True:
    if i == lenght-1: break

    _4percentOfCurrentPrice = (df["close"][i]/100)*4

    if abs(keyZonesMinMaxFlattened[lastIndexKeyZone] - keyZonesMinMaxFlattened[i+1]) < _4percentOfCurrentPrice \
    and keyZonesMinMaxFlattened[lastIndexKeyZone] not in keysToRemove: # niveaux trop proches
        keysToRemove.append(keyZonesMinMaxFlattened[i+1]) 
        i = i+1 
    else:
        lastIndexKeyZone = i+1
        i = i+1 


for key in keysToRemove: keyZonesMinMaxFlattened.remove(key)

print("length : ", len(keyZonesMinMaxFlattened))
print(keyZonesMinMaxFlattened)

length :  10
[4837.63, 4657.17, 4345.2, 4158.51, 4028.03, 3790.0, 3694.26, 3569.75, 3253.09, 2992.6]


In [12]:
df["MA50"] = df["close"].rolling(window=50).mean()

In [15]:
df2 = df
fig2 = go.Figure(
    data=[go.Candlestick(
            x=df2.index,
            open=df2['open'],
            high=df2['high'],
            low=df2['low'],
            close=df2['close'],
            increasing_line_color= 'green', 
            decreasing_line_color= 'red')
        ]
)
fig2.add_trace(go.Scatter(x=df2.index, y=df2["MA50"], mode="lines", line=dict(color="#F50E0E"))) # RED

fig2.add_scatter(x=df2.index, y=df2['pointpos'], mode="markers",
                marker=dict(size=5, color="MediumPurple"),
                name="pivot")

for k in keyZonesMinMaxFlattened:
    fig2.add_hline(y=k)

fig2.update_layout(xaxis_rangeslider_visible=False)
fig2.update_xaxes(showgrid=False)
fig2.update_yaxes(showgrid=False)
fig2.update_layout(paper_bgcolor='black', plot_bgcolor='black')

fig2