In [8]:
import pandas as pd
import numpy as np

# Load your OHLC data
df = pd.read_csv('ohlc_1m_data_last_day.csv')

# Ensure 'timestamp' is a datetime type
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Donchian Channel Parameters
donchian_period = 20  # 20-period by default

# Calculate Donchian Channel
df['Donchian_High'] = df['high'].rolling(window=donchian_period).max()
df['Donchian_Low'] = df['low'].rolling(window=donchian_period).min()

# VWAP Calculation
df['cumulative_volume'] = df['volume'].cumsum()
df['cumulative_price_volume'] = (df['close'] * df['volume']).cumsum()
df['VWAP'] = df['cumulative_price_volume'] / df['cumulative_volume']

# Breadth Indicator (Example: Percentage of closes above a moving average)
moving_average_period = 50
df['Moving_Avg'] = df['close'].rolling(window=moving_average_period).mean()
df['Breadth_Indicator'] = np.where(df['close'] > df['Moving_Avg'], 1, 0)

# Generate signals using 1 for 'Buy' and -1 for 'Sell'
df['Signal'] = np.where(
    (df['close'] > df['Donchian_High'].shift(1)) & 
    (df['close'] > df['VWAP']) & 
    (df['Breadth_Indicator'] == 1), 1,
    np.where(
        (df['close'] < df['Donchian_Low'].shift(1)) & 
        (df['close'] < df['VWAP']) & 
        (df['Breadth_Indicator'] == 0), -1, np.nan
    )
)

# Remove rows with NaN signals
df.dropna(subset=['Signal'], inplace=True)

# Convert Signal column to integer type
df['Signal'] = df['Signal'].astype(int)

# Map 1 to 'Buy' and -1 to 'Sell' in a separate 'Signal_Label' column
df['Hybrid_signal'] = df['Signal'].map({1: 'Buy', -1: 'Sell'})

# Resample to 4-hour intervals
df_resampled = df.resample('4H').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volume': 'sum',
    'Donchian_High': 'last',
    'Donchian_Low': 'last',
    'VWAP': 'last',
    'Moving_Avg': 'last',
    'Breadth_Indicator': 'last',
    'Signal': lambda x: x.mode()[0] if not x.mode().empty else np.nan,  # Most frequent signal within the period
    'Hybrid_signal': lambda x: x.mode()[0] if not x.mode().empty else np.nan  # Most frequent signal label within the period
})

# Remove rows where the signal is NaN
df_resampled.dropna(subset=['Signal'], inplace=True)

# Convert Signal column to integer type in the resampled DataFrame
df_resampled['Signal'] = df_resampled['Signal'].astype(int)

# Save the results to a new CSV file
df_resampled.to_csv('4h_hybrid_signals.csv', index=True)

print("4-hour resampled data with combined signals generated successfully.")


  df_resampled = df.resample('4H').agg({


4-hour resampled data with combined signals generated successfully.
