In [1]:
import pandas as pd

In [2]:
df = pd.read_csv(r'C:\Users\33670\Desktop\framework\data_collection\historical_data\top6_rolling_percentile.csv')

df = df[['first_hour_move','open_to_close_move','power_hour_move','gap_percentile', 'consecutive_days_above_20sma_percentile', 'open_from_BB_up_percentile', 'pm_dollar_volume_percentile','x_day_high_at_open_percentile','1month_change_from_high_percentile','red_days_percentile','green_days_percentile','lower_move_before_breakout_of_pm_high_percentile']]


def drop_rows_with_non_numeric(df):
    mask = df.applymap(lambda x: isinstance(x, (int, float)) and not isinstance(x, bool))
    return df[mask.all(axis=1)]

df_cleaned = drop_rows_with_non_numeric(df)
print(df_cleaned)

with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # permet d'afficher toutes les lignes & colonnes more options can be specified also
    print(df)

In [3]:
df = df.drop(columns=['lower_move_before_breakout_of_pm_high_percentile'])
#df.to_csv('main_tickers_percentiles.csv', index=True)

In [4]:
print(df.shape)

(1990, 11)


In [5]:
from itertools import combinations


# Prétraitement des données
columns_to_convert = [
    'first_hour_move', 'open_to_close_move', 'power_hour_move', 'gap_percentile',
    'consecutive_days_above_20sma_percentile', 'open_from_BB_up_percentile',
    'pm_dollar_volume_percentile', 'x_day_high_at_open_percentile',
    '1month_change_from_high_percentile', 'red_days_percentile', 'green_days_percentile'
]

for col in columns_to_convert:
    df[col] = df[col].replace(['na', '#VALUE!','','²'], float('nan')).astype(str).str.replace(',', '.').astype(float)

In [6]:
# Définir les combinaisons de paramètres et les conditions de filtrage
parameters = [
    'gap_percentile', 'consecutive_days_above_20sma_percentile', 
    'open_from_BB_up_percentile', 'pm_dollar_volume_percentile', 
    'x_day_high_at_open_percentile', '1month_change_from_high_percentile', 
    'red_days_percentile', 'green_days_percentile'
]

combinations_of_three = list(combinations(parameters, 3))

In [7]:
def all_filter_conditions_with_labels(param):
    conditions = [
        (df[param] < 0.3),
        (df[param] > 0.7),
        (df[param] >= 0.3) & (df[param] <= 0.7)
    ]
    labels = [
        "< 0.3",
        "> 0.7",
        "0.3 - 0.7"
    ]
    return list(zip(conditions, labels))

In [8]:
# Grid search avec tous les filtres possibles
results_with_all_filters = []

for combo in combinations_of_three:
    for condition_A, label_A in all_filter_conditions_with_labels(combo[0]):
        for condition_B, label_B in all_filter_conditions_with_labels(combo[1]):
            for condition_C, label_C in all_filter_conditions_with_labels(combo[2]):
                combined_condition = condition_A & condition_B & condition_C
                combined_label = f"{combo[0]}: {label_A}, {combo[1]}: {label_B}, {combo[2]}: {label_C}"
                
                filtered_df = df[combined_condition]
                
                if len(filtered_df) >= 50:
                    results_with_all_filters.append({
                        'Parameters': combo,
                        'Filter': combined_label,
                        'Sum_first_hour_move': filtered_df['first_hour_move'].sum(),
                        'Sum_open_to_close_move': filtered_df['open_to_close_move'].sum(),
                        'Sum_power_hour_move': filtered_df['power_hour_move'].sum(),
                        'Number of Rows': len(filtered_df)
                    })

results_with_all_filters_df = pd.DataFrame(results_with_all_filters)
sorted_results_with_all_filters_df = results_with_all_filters_df.sort_values(by="Sum_open_to_close_move", ascending=False)

# Sauvegarder les résultats dans un fichier CSV
sorted_results_with_all_filters_df.to_csv('./data/top6_resultats_grid_search_rolling_percentiles.csv', index=False)