In [1]:
import pandas as pd
import numpy as np

full_dtypes = {
    'above_20sma': "bool",
    'ticker': "category",
    'from_pm_high_to_open': "float64",
    'number_of_red_candles_one_hour_before_open': "float64",
    'percent_of_red_candles_one_hour_before_open': "float64",
    'green_days': "int64",
    'x_day_low_at_open': "int64",
    'x_day_high_at_open': "int64",
    'red_days': "int64",
    'pm_float_rotation': "float64",
    'pm_dollar_volume': "float64",
    'pm_retracement': "float64",
    'lower_move_before_breakout_of_pm_high': "float64",
    '1month_change_from_high': "float64",
    'first_15_min_relative_volume': "float64",
    'open_from_20sma': "float64",
    'gap': "float64",
    'average_dollar_volume_previous_day': "float64",
}

# Liste des colonnes pour lesquelles calculer le percentile glissant
parameters = [
    'pm_retracement','lower_move_before_breakout_of_pm_high',
    'first_15_min_relative_volume','green_days','red_days',
    '1month_change_from_high','open_from_20sma','x_day_low_at_open',
    'x_day_high_at_open','gap','average_dollar_volume_previous_day'
]

# Fonction pour calculer le rang percentile d'un score par rapport à une liste de scores
def percentile_of_score(a, score):
    count = len(a)
    if count == 0:
        return np.nan
    return (sum(i < score for i in a) + 0.5 * sum(i == score for i in a)) / count

# Fonction pour calculer le percentile glissant
def calculate_rolling_percentile(series):
    def calc_percentile(window):
        return percentile_of_score(window[:-1], window[-1]) if len(window) > 1 else np.nan
    return series.rolling('30D').apply(calc_percentile, raw=False)

# Charger les données depuis le fichier CSV
#df_test = pd.read_csv('./data/test_pour_chatgpt.csv')
df_test = pd.read_csv(r'C:\Users\33670\Desktop\framework\data_collection\historical_data/20231127.csv')


# Convertir les valeurs "na" en NaN dans chaque colonne 
for colonne in parameters:
    df_test[colonne] = pd.to_numeric(df_test[colonne], errors='coerce')

# Convertir la colonne 'date' en type datetime et indexer par date
df_test['date'] = pd.to_datetime(df_test['date'])
df_test.set_index('date', inplace=True)

# Gère les types et 'na' des colonnes
def get_dtype(df):
    column_names = df.columns.tolist()
    filtered_dict = {key: value for key, value in full_dtypes.items() if key in column_names}
    return filtered_dict

df_test.replace('na', np.nan, inplace=True)
dtypes = get_dtype(df_test)
df_test = df_test.astype(dtypes)


# Appliquer le calcul du percentile glissant pour chaque ticker
result_df_test = pd.DataFrame()
for ticker in df_test['ticker'].unique():
    subset = df_test[df_test['ticker'] == ticker].copy()
    
    for param in parameters:
        col_name = param + '_percentile'
        subset[col_name] = calculate_rolling_percentile(subset[param])
    
    result_df_test = pd.concat([result_df_test, subset])

# Réinitialiser l'index pour avoir un index numérique continu
result_df_test.reset_index(inplace=True)

# Afficher les premières lignes pour vérifier
print(result_df_test.head())


        date ticker   week_day    open      high   close      low  freeFloat  \
0 2021-01-04   AAPL     Monday  133.52  133.6116  129.41  126.760  99.890207   
1 2021-01-05   AAPL    Tuesday  128.89  131.7400  131.01  128.430  99.890207   
2 2021-01-06   AAPL  Wednesday  127.72  131.0499  126.60  126.382  99.890207   
3 2021-01-07   AAPL   Thursday  128.36  131.6300  130.92  127.860  99.890207   
4 2021-01-08   AAPL     Friday  132.43  132.6300  132.05  130.230  99.890207   

   floatShares    market_cap  ...  \
0  15617034345  2.087478e+12  ...   
1  15617034345  2.015092e+12  ...   
2  15617034345  1.996800e+12  ...   
3  15617034345  2.006806e+12  ...   
4  15617034345  2.070437e+12  ...   

   lower_move_before_breakout_of_pm_high_percentile  \
0                                               NaN   
1                                          0.000000   
2                                          0.000000   
3                                          0.333333   
4                    

In [2]:
# Sauvegarder le résultat
result_df_test.to_csv(r'C:\Users\33670\Desktop\framework\data_collection\historical_data/old/1709223_rolling_percentiles.csv', index=False)