In [4]:
import pandas as pd 
import numpy as np

def fut_data(df):

    # ==================التأكد من أن العمود datetime=================================
    
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['date'] = df['timestamp'].dt.date
    
    # ================(دوال لحساب أعلى/أقل سعر حتى الصف الحالي)======================
    
    def calc_high_so_far(group):
        group['high_today'] = group['high'].cummax()
        idx_high = group['high'].cummax().eq(group['high'])
        group['vol_at_high_today'] = group['volume'].where(idx_high).ffill()
        return group
    
    def calc_low_so_far(group):
        group['low_today'] = group['low'].cummin()
        idx_low = group['low'].cummin().eq(group['low'])
        group['vol_at_low_today'] = group['volume'].where(idx_low).ffill()
        return group
    
    # نخزن عمود date مؤقت
    dates = df['date']
    
    # نستبعد date من groupby.apply لتفادي التحذير
    df = df.drop(columns=['date'])
    df = pd.concat([
        calc_high_so_far(group) 
        for _, group in pd.concat([dates, df], axis=1).groupby(dates)
    ], axis=0)
    
    df = pd.concat([
        calc_low_so_far(group) 
        for _, group in df.groupby(dates)
    ], axis=0)
    
    # نرجع العمود date
    df['date'] = dates
    
    # ===================(حساب بيانات اليوم السابق بدون تسريب)==========================
    
    df['high_prev_day'] = df.groupby('date')['high_today'].shift(1)
    df['vol_at_high_prev_day'] = df.groupby('date')['vol_at_high_today'].shift(1)
    df['low_prev_day'] = df.groupby('date')['low_today'].shift(1)
    df['vol_at_low_prev_day'] = df.groupby('date')['vol_at_low_today'].shift(1)
    
    #===========================(volume fut)============================================
    
    # Relative Volume (RVOL)
    df['RVOL'] = df['volume'] / df['volume'].rolling(window=20).mean()
    
    # Volume Rate of Change (VROC)
    df['VROC'] = (df['volume'] - df['volume'].shift(5)) / df['volume'].shift(5)
    
    # Volume Weighted Spread
    df['spread_vol'] = (df['high'] - df['low']) * df['RVOL']
    
    # VWAP Deviation (بالانحراف المعياري)
    vwap_std = df['vwap'].rolling(window=20).std()
    df['vwap_dev'] = (df['close'] - df['vwap']) / vwap_std
    
    # Volume per Transaction
    df['vol_per_tx'] = df['volume'] / df['transactions']
    
    # Accumulation/Distribution based on VWAP
    df['accum_dist_vwap'] = np.where(df['close'] > df['vwap'], df['volume'], -df['volume'])
    df['accum_dist_vwap'] = df['accum_dist_vwap'].cumsum()
    
    # Volume Spike Detector → بدون تسريب
    df['vol_spike'] = (
        df['volume'] > (df['volume'].rolling(window=10).mean().shift(1) * 2)
    ).astype(int)
    
    # Volume Climax Bar → بدون تسريب وبـ tolerance
    max_vol_threshold = df['volume'].rolling(window=20).max().shift(1) * 0.95
    wide_range_threshold = (df['high'] - df['low']).rolling(window=20).mean().shift(1)
    
    df['climax_bar'] = (
        (df['volume'] >= max_vol_threshold) & 
        ((df['high'] - df['low']) > wide_range_threshold)
    ).astype(int)

    df['vwap_distance'] = df['close'] - df['vwap']
    
    df['vol_change_pct'] = (df['volume'] - df['volume'].shift(1)) / df['volume'].shift(1) * 100

    # Transaction Density (طبيعي بدون تسريب)
    df['tx_density'] = df['transactions'] / df['volume']
    
    # VWAP Pullback Strength - بدون تسريب
    df['vwap_pullback'] = (df['vwap'].shift(1) - df['close'].shift(1)) / df['vwap'].shift(1)
    
    # Volume Shock - بدون تسريب
    vol_avg = df['volume'].rolling(20).mean().shift(1)
    df['vol_shock'] = (df['volume'] > (vol_avg * 2)).astype(int) 
    
    #==================================(price fut)==================================
    
    denominator = (df['high'].rolling(5).max().shift(1) - df['low'].rolling(5).min().shift(1))
    denominator = denominator.replace(0, np.nan)  # استبدال المقام إذا كان صفر بـ NaN
    
    df['micro_trend_slope'] = (
        (df['close'] - df['close'].shift(5)) / denominator
    )
    
    # Price Change per Second (لو عندك بيانات وقت بالثواني)
    # تحويل timestamp إلى datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['seconds_diff'] = (df['timestamp'] - df['timestamp'].shift(1)).dt.total_seconds()
    df['price_change_per_sec'] = (df['close'] - df['close'].shift(1)) / df['seconds_diff']
    
    # Position in Range (آخر 20 شمعة) - بدون تسريب
    window_range = 20
    low_min = df['low'].rolling(window_range).min().shift(1)
    high_max = df['high'].rolling(window_range).max().shift(1)
    df['pos_in_range'] = (df['close'] - low_min) / (high_max - low_min)
    
    # Micro Support/Resistance Distance - بدون تسريب
    df['dist_to_recent_high'] = df['high'].rolling(window_range).max().shift(1) - df['close']
    df['dist_to_recent_low'] = df['close'] - df['low'].rolling(window_range).min().shift(1)
    
    # Candle Body-to-Range Ratio (ده طبيعي مفيهوش تسريب)
    df['body_to_range'] = abs(df['close'] - df['open']) / (df['high'] - df['low'])

    # الفرق بين سعر الفتح الحالي وسعر الإغلاق السابق
    df['open_vs_prev_close'] = df['open'] - df['close'].shift(1)
       
    #===================================(time fut)===================================
    
    # حساب الفرق بالدقائق بين الصف الحالي والسابق
    df['minutes_diff'] = (df['timestamp'] - df['timestamp'].shift(1)).dt.total_seconds() / 60
    
    # استخراج الساعة
    df['hour'] = df['timestamp'].dt.hour
    
    # استخراج الدقائق
    df['minute'] = df['timestamp'].dt.minute
    
    # تحديد الربع من الساعة
    def get_quarter(minute):
        if minute < 15:
            return 1
        elif minute < 30:
            return 2
        elif minute < 45:
            return 3
        else:
            return 4
    
    df['quarter_hour'] = df['minute'].apply(get_quarter)
    
    #df.dropna(inplace=True)

    #=====================================(y)========================================
    
    n_future = 3  # 6 ساعات لقدّام
    
    df['future_return'] = df['close'].shift(-n_future) / df['close'] - 1
    
    # نحدد الإشارات بناءً على عتبة معينة للحركة
    threshold = 0.0003  
    
    df['Signal'] = 0
    df.loc[df['future_return'] > threshold, 'Signal'] = 2
    df.loc[df['future_return'] < -threshold, 'Signal'] = 1

    #================================(clean data)====================================
    
    df.dropna(inplace=True)

    #================================================================================

    return df


In [5]:
df = pd.read_csv(r"C:\Users\Access\Documents\DATA\AAPL\df-23-8-25-8.csv" )

df_train = df[:70000]
df_test = df[70000:]

df_train = fut_data(df_train)
df_test = fut_data(df_test)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['timestamp'] = pd.to_datetime(df['timestamp'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = df['timestamp'].dt.date
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['timestamp'] = pd.to_datetime(df['timestamp'])
A value is trying to be set on a copy of a slice from a DataFrame.
T

In [11]:
df_test.reset_index(inplace=True)

In [15]:
df_test = df_test[9:]