In [1]:
import pandas as pd
import numpy as np
from pandas.tseries.offsets import BDay
import requests
from datetime import timedelta
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Data Preparattion

In [2]:
def fetch_data(url):
    response = requests.get(url)
    if response.status_code != 200:
        print("Error fetching data:", response.status_code, response.text)
        return None
    data = response.json()
    if 'results' not in data:
        print("No 'results' key in response:", data)
        return None
    return data

def calculate_daily_returns(df, prev_close=None):
    if prev_close is not None:
        df.loc[df.index[0], 'prev_close'] = prev_close
    else:
        df['prev_close'] = df['c'].shift(1)
    df['daily_return'] = (df['c'] - df['prev_close']) / df['prev_close']
    df['abs_daily_return'] = df['daily_return'].abs()
    return df

def get_top_outliers(df, n=10):
    return df.nlargest(n, 'abs_daily_return')

def update_outliers_list(current_df, historical_outliers_df, real_time_outliers_df, n=10):
    if 'source' not in current_df.columns:
        current_df['source'] = 'real-time'
    combined_df = pd.concat([historical_outliers_df, current_df])
    updated_outliers_df = combined_df.nlargest(n, 'abs_daily_return')
    updated_historical_outliers_df = updated_outliers_df[updated_outliers_df['source'] == 'historical']
    updated_real_time_outliers_df = updated_outliers_df[updated_outliers_df['source'] == 'real-time']
    return updated_historical_outliers_df, updated_real_time_outliers_df

def convert_timestamps(df):
    df['date'] = pd.to_datetime(df['t'], unit='ms')
    df.drop(columns=['t'], inplace=True)
    return df

# API key and endpoints
api_key = 'beBybSi8daPgsTp5yx5cHtHpYcrjp5Jq'
today = pd.Timestamp.now().date()
start_date = today - pd.DateOffset(years=1)
start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date = today - pd.DateOffset(days=1)
end_date_formatted = end_date.strftime('%Y-%m-%d')
pair = 'C:USDCHF'
historical_url = f'https://api.polygon.io/v2/aggs/ticker/{pair}/range/1/day/{start_date_formatted}/{end_date_formatted}?adjusted=true&sort=asc&apiKey={api_key}'
real_time_url = f'https://api.polygon.io/v2/aggs/ticker/{pair}/range/1/day/{today}/{today}?adjusted=true&sort=asc&apiKey={api_key}'

# Fetch and process historical data
historical_data = fetch_data(historical_url)
if historical_data:
    historical_df = pd.DataFrame(historical_data['results'])
    historical_df = convert_timestamps(historical_df)
    historical_df = calculate_daily_returns(historical_df)
    historical_df['source'] = 'historical'
    historical_outliers_df = get_top_outliers(historical_df)
else:
    print("Failed to fetch or process historical data.")

# Fetch and process real-time data
real_time_data = fetch_data(real_time_url)
if real_time_data and 'results' in real_time_data:
    real_time_df = pd.DataFrame(real_time_data['results'])
    real_time_df = convert_timestamps(real_time_df)
    # Use the last close from historical data
    last_close = historical_df['c'].iloc[-1] if not historical_df.empty else None
    real_time_df = calculate_daily_returns(real_time_df, prev_close=last_close)
    real_time_df['source'] = 'real-time'
    updated_historical_outliers_df, updated_real_time_outliers_df = update_outliers_list(real_time_df, historical_outliers_df, pd.DataFrame())
    # Update historical data
    historical_df = pd.concat([historical_df.iloc[1:], real_time_df])  # Keep historical data rolling
else:
    print("No new data available or failed to fetch real-time data.")
    
# Combine data for Top 10 Outliers
full_outlier_df = pd.concat([updated_historical_outliers_df, updated_real_time_outliers_df])

sorted_outliers_data = full_outlier_df.sort_values(by="date")

In [3]:
def fetch_data(url):
    response = requests.get(url)
    if response.status_code != 200:
        print("Error fetching data:", response.status_code, response.text)
        return None
    data = response.json()
    if 'results' not in data:
        print("No 'results' key in response:", data)
        return None
    return data

def calculate_daily_returns_threshold(df, prev_close=None):
    if prev_close is not None:
        df.loc[df.index[0], 'prev_close'] = prev_close
    else:
        df['prev_close'] = df['c'].shift(1)
    df['daily_return'] = (df['c'] - df['prev_close']) / df['prev_close']
    # print(df['daily_return'])
    df['abs_daily_return'] = df['daily_return'].abs()
    return df

def convert_timestamps(df):
    df['date'] = pd.to_datetime(df['t'], unit='ms')
    df.drop(columns=['t'], inplace=True)
    return df


def fetch_daily_return_prior(symbol, current_date, previous_date, api_key):
    url = f'https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/day/{previous_date}/{current_date}?adjusted=true&sort=asc&apiKey={api_key}'
    data = fetch_data(url)
    if data and 'results' in data and len(data['results']) > 0:
        df = pd.DataFrame(data['results'])
        # print(df)
        df = convert_timestamps(df)
        # print(f"converted: {df}")
        df = df.loc[::-1].reset_index(drop=True)
        df = calculate_daily_returns_threshold(df)
        print(f"daily return: {df}")
        # print(f"daily_return {df['abs_daily_return']}")
        if len(df['abs_daily_return']) == 2:
            return df['abs_daily_return'].iloc[1]
    return None
    

def find_prior_outlier_threshold(df, symbol, api_key):
    df = df.sort_values(by='date').reset_index(drop=True)
    df['date'] = pd.to_datetime(df['date'])
    
    results = []
    
    for index, row in df.iterrows():
        current_date = row['date']
        current_return = row['abs_daily_return']
        print(f"outlier date: {current_date}")
        while True:
            # print(f"current_return: {current_return}")
            previous_date = current_date - pd.DateOffset(days=1)
            # print(f"previous date: {previous_date}")
            current_date_str = current_date.strftime('%Y-%m-%d')
            previous_date_str = previous_date.strftime('%Y-%m-%d')
            previous_return = fetch_daily_return_prior(symbol, current_date_str, previous_date_str, api_key)
            # print(f"previous return: {previous_return}")
            if previous_return is not None:
                if (current_return - previous_return) < 0:
                    
                    days_difference = (row['date'] - previous_date).days
                    # print(current_return - previous_return)
                    results.append({
                        'outlier_date': row['date'],
                        'prior_outlier_threshold_date': previous_date,
                        'days_difference': days_difference
                    })
                    # print()
                    break
                else:
                    current_date = previous_date
                    current_return = previous_return
            else:
                # print(f"Failed to fetch data for {previous_date_str}")
                # break
                current_date = previous_date

    return pd.DataFrame(results)


# Applying the corrected function to the data
prior_outlier_thresholds = find_prior_outlier_threshold(sorted_outliers_data, pair, api_key)

outlier date: 2023-11-14 00:00:00
daily return:         v      vw        o       c        h       l       n       date  \
0  168024  0.8965  0.90127  0.8891  0.90271  0.8875  168024 2023-11-14   
1  149889  0.9024  0.90192  0.9012  0.90530  0.9004  149889 2023-11-13   

   prev_close  daily_return  abs_daily_return  
0         NaN           NaN               NaN  
1      0.8891      0.013609          0.013609  
outlier date: 2023-12-27 00:00:00
daily return:         v      vw        o        c        h       l       n       date  \
0  174820  0.8495  0.85359  0.84250  0.85487  0.8406  174820 2023-12-27   
1  112192  0.8555  0.85600  0.85372  0.85796  0.8528  112192 2023-12-26   

   prev_close  daily_return  abs_daily_return  
0         NaN           NaN               NaN  
1      0.8425      0.013318          0.013318  
outlier date: 2024-01-02 00:00:00
daily return:         v      vw        o        c        h         l       n       date  \
0  148722  0.8473  0.84185  0.85004  0.850

In [4]:
def fetch_daily_return_post(symbol, current_date, post_date, api_key):
    url = f'https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/day/{current_date}/{post_date}?adjusted=true&sort=asc&apiKey={api_key}'
    data = fetch_data(url)
    if data and 'results' in data and len(data['results']) > 0:
        df = pd.DataFrame(data['results'])
        # print(df)
        df = convert_timestamps(df)
        # print(f"converted: {df}")
        df = calculate_daily_returns_threshold(df)
        # print(f"daily return: {df}")
        # print(f"daily_return {df['abs_daily_return']}")
        if len(df['abs_daily_return']) == 2:
            return df['abs_daily_return'].iloc[1]
    return None


def find_post_outlier_threshold(df, symbol, api_key):
    df = df.sort_values(by='date').reset_index(drop=True)
    df['date'] = pd.to_datetime(df['date'])
    
    results = []
    
    for index, row in df.iterrows():
        current_date = row['date']
        current_return = row['abs_daily_return']
        # print(f"outlier date: {current_date}")
        while True:
            # print(f"current_return: {current_return}")
            post_date = current_date + pd.DateOffset(days=1)
            # print(f"previous date: {previous_date}")
            current_date_str = current_date.strftime('%Y-%m-%d')
            post_date_str = post_date.strftime('%Y-%m-%d')
            post_return = fetch_daily_return_post(symbol, current_date_str, post_date_str, api_key)
            # print(f"post return: {post_return}")
            if post_return is not None:
                if (current_return - post_return) < 0:
                    
                    days_difference = (post_date - row['date']).days
                    # print(current_return - post_return)
                    results.append({
                        'outlier_date': row['date'],
                        'post_outlier_threshold_date': post_date,
                        'days_difference': days_difference
                    })
                    # print()
                    break
                else:
                    current_date = post_date
                    current_return = post_return
            else:
                # print(f"Failed to fetch data for {post_date_str}")
                # break
                current_date = post_date

    return pd.DataFrame(results)

post_outlier_thresholds = find_post_outlier_threshold(sorted_outliers_data, pair, api_key)

In [5]:
prior_outlier_thresholds.rename(columns={'outlier_date': 'date'}, inplace=True)
sorted_outliers_data = pd.merge(
    sorted_outliers_data,
    prior_outlier_thresholds[['date', 'days_difference']],
    on = 'date',
    how='left'
)
sorted_outliers_data.rename(columns={'days_difference':'pre_threshold'}, inplace=True)

post_outlier_thresholds.rename(columns={'outlier_date': 'date'}, inplace=True)
sorted_outliers_data = pd.merge(
    sorted_outliers_data,
    post_outlier_thresholds[['date', 'days_difference']],
    on = 'date',
)
sorted_outliers_data.rename(columns={'days_difference':'post_threshold'}, inplace=True)

In [6]:
# Convert dates in dataset to datetime objects
sorted_outliers_data['date'] = pd.to_datetime(sorted_outliers_data['date'])

date_ranges = pd.DataFrame({
    "start_date": sorted_outliers_data.apply(lambda row: row['date'] - BDay(row['pre_threshold']), axis=1),
    "end_date": sorted_outliers_data.apply(lambda row: row['date'] + BDay(row['post_threshold']), axis=1),
    "outlier_date": sorted_outliers_data['date'],
    # "year": sorted_outliers_data['year']
})

In [7]:
def classify_recovery_range(df, start_date, end_date, fast_threshold=4):
    # Filter the data for the specified range
    range_data = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
    if range_data.empty:
        return "no data"
    
    # Calculate cumulative percentage change
    cumulative_change = range_data['pct_change'].sum()
    
    # Classify the recovery based on the cumulative change
    if cumulative_change >= fast_threshold:
        return 'fast recovery'
    else:
        return 'slow recovery'

def calculate_daily_returns(df, prev_close=None):
    if prev_close is not None:
        df.loc[df.index[0], 'prev_close'] = prev_close
    else:
        df['prev_close'] = df['c'].shift(1)
    df['daily_return'] = (df['c'] - df['prev_close']) / df['prev_close']
    df['abs_daily_return'] = df['daily_return'].abs()
    df['pct_change'] = df['c'].pct_change() * 100
    
    df['direction'] = df['pct_change'].apply(lambda x: 'Up' if x > 0 else 'Down')
    df['pct_change'] = df['pct_change'].abs()
    
    return df

In [8]:
def fetch_hourly_data_chunk(symbol, start_date, end_date, api_key):
    formatted_start_date = start_date.strftime('%Y-%m-%d')
    formatted_end_date = end_date.strftime('%Y-%m-%d')

    url = f"https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/hour/{formatted_start_date}/{formatted_end_date}?apiKey={api_key}"
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Failed to fetch data: {response.status_code} - {response.text}")
        return None
    
    response_data = response.json()
    
    if 'results' not in response_data:
        print(f"No 'results' in response: {response_data}")
        return None

    df = pd.DataFrame(response_data['results'])
    df['date'] = pd.to_datetime(df['t'], unit='ms')
    df.drop(columns=['t'], inplace=True)
    
    return df

def fetch_and_process_hourly_data(symbol, start_date, end_date, api_key):
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_colwidth', None)
    
    # Split the date range into smaller chunks
    chunk_size = 3  # Fetch data in 7-day chunks
    date_ranges = [(start_date + timedelta(days=i*chunk_size), 
                    min(end_date, start_date + timedelta(days=(i+1)*chunk_size - 1)))
                   for i in range((end_date - start_date).days // chunk_size + 1)]

    # print((end_date - start_date).days // chunk_size + 1)
    all_data = []

    for start, end in date_ranges:
        chunk_data = fetch_hourly_data_chunk(symbol, start, end, api_key)
        if chunk_data is not None:
            all_data.append(chunk_data)
    
    if not all_data:
        print("No data fetched")
        return None
    
    df = pd.concat(all_data)
    hourly_data = calculate_daily_returns(df)
    hourly_data['Recovery'] = classify_recovery_range(hourly_data, start_date, end_date)
    hourly_data.set_index('date', inplace=True)
    
    full_index = pd.date_range(start=start_date, end=end_date + timedelta(days=1), freq='H')
    hourly_data = hourly_data.reindex(full_index)
    
    hourly_data.reset_index(inplace=True)
    hourly_data.rename(columns={'index': 'date'}, inplace=True)
    
    return hourly_data

# Creating prior and post data for testing and evaluating

In [9]:
# Convert start_date, end_date, and outlier_date to Timestamp for consistent comparison
start_date_co = pd.Timestamp(date_ranges['start_date'].iloc[-1])
end_date_co = pd.Timestamp(date_ranges['end_date'].iloc[-1]) + pd.Timedelta(days=1)  # Extend the end date by one additional day
outlier_date_co = pd.Timestamp(date_ranges['outlier_date'].iloc[-1])

# Get daily data for the range including days before and after the outlier
daily_data = fetch_and_process_hourly_data(pair, start_date_co, end_date_co, api_key)

# Filter out weekdends
daily_data = daily_data[~daily_data['date'].dt.weekday.isin([5,6])]

# prior_data from start_date to outlier_date inclusive
prior_data = daily_data[(daily_data['date'] >= start_date_co) & (daily_data['date'] < outlier_date_co)]
prior_data["day type"] = "prior day"

# outlier_data is for the hourly data on the day of the outlier
outlier_data = daily_data[(daily_data['date'].dt.date == outlier_date_co.date())]
outlier_data["day type"] = "outlier day"

# post_data from the day after outlier_date to end_date
post_outlier_ts = outlier_date_co + pd.Timedelta(days=1)  # Starting the day after the outlier_date
post_data = daily_data[(daily_data['date'] >= post_outlier_ts) & (daily_data['date'] <= end_date_co)]
post_data["day type"] = "post day"

  full_index = pd.date_range(start=start_date, end=end_date + timedelta(days=1), freq='H')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prior_data["day type"] = "prior day"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outlier_data["day type"] = "outlier day"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  post_data["day type"] = "post day"


In [10]:
post_data

Unnamed: 0,date,v,vw,o,c,h,l,n,prev_close,daily_return,abs_daily_return,pct_change,direction,Recovery,day type
48,2024-07-18 00:00:00,5674.0,0.883,0.88203,0.88362,0.88372,0.882,5674.0,0.88195,0.001894,0.001894,0.189353,Up,fast recovery,post day
49,2024-07-18 01:00:00,5576.0,0.8841,0.88363,0.88433,0.88455,0.8834,5576.0,0.88362,0.000804,0.000804,0.080351,Up,fast recovery,post day
50,2024-07-18 02:00:00,4355.0,0.8844,0.8843,0.88439,0.88457,0.8839,4355.0,0.88433,6.8e-05,6.8e-05,0.006785,Up,fast recovery,post day
51,2024-07-18 03:00:00,4009.0,0.8844,0.8844,0.88465,0.88471,0.8838,4009.0,0.88439,0.000294,0.000294,0.029399,Up,fast recovery,post day
52,2024-07-18 04:00:00,6461.0,0.8845,0.8846,0.88462,0.8848,0.884,6461.0,0.88465,-3.4e-05,3.4e-05,0.003391,Down,fast recovery,post day
53,2024-07-18 05:00:00,7241.0,0.8842,0.88461,0.8835,0.88471,0.8834,7241.0,0.88462,-0.001266,0.001266,0.126608,Down,fast recovery,post day
54,2024-07-18 06:00:00,8892.0,0.884,0.8837,0.88391,0.88432,0.8834,8892.0,0.8835,0.000464,0.000464,0.046406,Up,fast recovery,post day
55,2024-07-18 07:00:00,11086.0,0.8836,0.88391,0.88376,0.88446,0.8824,11086.0,0.88391,-0.00017,0.00017,0.01697,Down,fast recovery,post day
56,2024-07-18 08:00:00,9256.0,0.8839,0.88377,0.8841,0.88454,0.883,9256.0,0.88376,0.000385,0.000385,0.038472,Up,fast recovery,post day
57,2024-07-18 09:00:00,7024.0,0.8841,0.88426,0.88395,0.8845,0.8835,7024.0,0.8841,-0.00017,0.00017,0.016966,Down,fast recovery,post day


# LSTM Model

In [39]:
# # Load the dataset
# data = pd.read_csv('/Users/priyakundu/Downloads/USDCHF_daily.csv')
# data.fillna(method='ffill', inplace=True)
# data.fillna(method='bfill', inplace=True)

# # Create target variables
# data['direction_target'] = data['direction'].apply(lambda x: 1 if x == 'Up' else 0)
# data['recovery_target'] = data['Recovery'].apply(lambda x: 1 if x == 'fast recovery' else 0)
# data.drop(columns=['direction', 'Recovery'], inplace=True)

# # Function to create sequences based on prior day and post day for each outlier ID
# def create_sequences_per_outlier(data):
#     sequences = []
#     direction_targets = []
#     recovery_targets = []
    
#     unique_ids = data['outlier_id'].unique()
    
#     for oid in unique_ids:
#         prior_day_data = data[(data['outlier_id'] == oid) & (data['day type'] == 'prior day')]['c'].values
#         post_day_data = data[(data['outlier_id'] == oid) & (data['day type'] == 'post day')]
#         post_day_direction_target = post_day_data['direction_target'].values[:24]
#         post_day_recovery_target = post_day_data['recovery_target'].values[:24]
        
#         if len(post_day_direction_target) == 24 and len(post_day_recovery_target) == 24:
#             sequences.append(prior_day_data)
#             direction_targets.append(post_day_direction_target)
#             recovery_targets.append(post_day_recovery_target)
    
#     return sequences, np.array(direction_targets), np.array(recovery_targets)

# # Create sequences
# sequences, direction_targets, recovery_targets = create_sequences_per_outlier(data)

# # Find the maximum sequence length
# max_seq_length = max(len(seq) for seq in sequences)

# # Pad sequences to ensure they have the same length
# X = pad_sequences(sequences, maxlen=max_seq_length, dtype='float32', padding='post', value=0.0)

# # Reshape data for LSTM (samples, time steps, features)
# X = X.reshape((X.shape[0], X.shape[1], 1))

# # Convert targets to categorical
# y_direction = to_categorical(direction_targets)
# y_recovery = to_categorical(recovery_targets)

# print(X.shape)
# print(y_direction.shape)
# print(y_recovery.shape)

In [40]:
# # Load the dataset
# data = pd.read_csv('/Users/priyakundu/Downloads/USDCHF_daily.csv')
# data.fillna(method='ffill', inplace=True)
# data.fillna(method='bfill', inplace=True)

# # Create target variables
# data['direction_target'] = data['direction'].apply(lambda x: 1 if x == 'Up' else 0)
# data['recovery_target'] = data['Recovery'].apply(lambda x: 1 if x == 'fast recovery' else 0)
# data.drop(columns=['direction', 'Recovery'], inplace=True)

# grouped = data.groupby('outlier_id')
# for name, group in grouped:
	
# 	prior_day_data = group[group['day type'] == 'prior day']['c'].values
# 	print(prior_day_data)
# 	post_day_data = group[group['day type'] == 'post day']['direction_target'].values
# 	print(post_day_data)


In [None]:
# # Build the LSTM model for direction prediction
# model_direction = Sequential()
# model_direction.add(LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)))
# model_direction.add(Dropout(0.2))
# model_direction.add(LSTM(50))
# model_direction.add(Dropout(0.2))
# model_direction.add(Dense(2, activation='softmax'))

# # Compile the model
# model_direction.compile(optimizer=Adam(learning_rate=0.005), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# # Train the model
# model_direction.fit(X, y_direction, epochs=50, batch_size=32)

In [None]:
# # Build the LSTM model for recovery prediction
# model_recovery = Sequential()
# model_recovery.add(LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)))
# model_recovery.add(Dropout(0.2))
# model_recovery.add(LSTM(50))
# model_recovery.add(Dropout(0.2))
# model_recovery.add(Dense(2, activation='softmax'))

# # Compile the model
# model_recovery.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# # Train the model
# model_recovery.fit(X, y_recovery, epochs=50, batch_size=32)

In [None]:
# # Function to prepare sequences for new data
# def prepare_sequences(data):
#     sequences = []
#     # Ensure the data is sorted by date
#     data = data.sort_values(by='date')
#     prior_data = data[data['day type'] == 'prior day']
#     if not prior_data.empty:
#         prior_day_close_prices = prior_data['c'].values
#         sequences.append(prior_day_close_prices)
#     return pad_sequences(sequences, dtype='float32', padding='post', value=0.0)

# # # Load prior day data for evaluation
# # prior_day_data = pd.read_csv('/path/to/prior_day_data.csv')

# # Prepare sequences
# X_eval = prepare_sequences(prior_data)
# X_eval = X_eval.reshape((X_eval.shape[0], X_eval.shape[1], 1))

# # Predict direction and recovery
# y_pred_dir = model_direction.predict(X_eval)
# y_pred_rec = model_recovery.predict(X_eval)

# # Convert predictions to class labels
# y_pred_dir = np.argmax(y_pred_dir, axis=1)
# y_pred_rec = np.argmax(y_pred_rec, axis=1)

# # # Load post day data for evaluation
# # post_day_data = pd.read_csv('/path/to/post_day_data.csv')

# # Prepare true labels
# post_data['direction_target'] = post_data['direction'].apply(lambda x: 1 if x == 'Up' else 0)
# post_data['recovery_target'] = post_data['Recovery'].apply(lambda x: 1 if x == 'fast recovery' else 0)

# # True labels
# y_true_dir = post_data['direction_target'].values
# y_true_rec = post_data['recovery_target'].values

# # Ensure the lengths match for evaluation
# min_len = min(len(y_pred_dir), len(y_true_dir))

# y_pred_dir = y_pred_dir[:min_len]
# y_true_dir = y_true_dir[:min_len]
# y_pred_rec = y_pred_rec[:min_len]
# y_true_rec = y_true_rec[:min_len]

# # Evaluate model performance
# print("Direction Prediction Metrics:")
# print("Accuracy:", accuracy_score(y_true_dir, y_pred_dir))
# print("Precision:", precision_score(y_true_dir, y_pred_dir))
# print("Recall:", recall_score(y_true_dir, y_pred_dir))
# print("F1 Score:", f1_score(y_true_dir, y_pred_dir))

# print("\nRecovery Prediction Metrics:")
# print("Accuracy:", accuracy_score(y_true_rec, y_pred_rec))
# print("Precision:", precision_score(y_true_rec, y_pred_rec))
# print("Recall:", recall_score(y_true_rec, y_pred_rec))
# print("F1 Score:", f1_score(y_true_rec, y_pred_rec))