In [2]:
import pandas as pd
import numpy as np
import ta
from data_resorces import data_source

class alpha_factor:
    def __init__(self):
        pass

    @staticmethod    
    def ta_factor_indcators(df):
        df = pd.DataFrame(df, columns=['close', 'high', 'low', 'open', 'volume'])

        # RSI
        df['rsi'] = ta.momentum.RSIIndicator(df['close']).rsi()

        # Stochastic RSI
        stoch_rsi = ta.momentum.StochRSIIndicator(df['close'])
        df['stoch_rsi'] = stoch_rsi.stochrsi()

        # ROC
        df['roc'] = ta.momentum.ROCIndicator(df['close']).roc()

        # MACD
        macd = ta.trend.MACD(df['close'])
        df['macd_line'] = macd.macd()
        df['macd_signal'] = macd.macd_signal()
        df['macd_diff'] = macd.macd_diff()

        # Moving Averages
        df['sma_20'] = ta.trend.SMAIndicator(df['close'], window=20).sma_indicator()
        df['sma_50'] = ta.trend.SMAIndicator(df['close'], window=50).sma_indicator()

        # Bollinger Bands
        bb = ta.volatility.BollingerBands(df['close'])
        df['bb_bbm'] = bb.bollinger_mavg()
        df['bb_bbh'] = bb.bollinger_hband()
        df['bb_bbl'] = bb.bollinger_lband()
        
        df["returns"] = df["close"].pct_change()
        typical_price = (df["high"] + df["low"] + df["close"]) / 3
        df["vwap"] = (typical_price * df["volume"]).cumsum() / df["volume"].cumsum()


        return df.dropna()


In [3]:
import numpy as np
import pandas as pd
import requests


# Define helper functions for alpha formulas
def rank(series):
    return series.rank(pct=True)

def ts_rank(series, window):
    return series.rolling(int(window)).apply(lambda x: pd.Series(x).rank(pct=True).iloc[-1], raw=False)

def delta(series, period=1):
    return series.diff(period)

def delay(series, period=1):
    return series.shift(period)

def correlation(x, y, window):
    return x.rolling(int(window)).corr(y)

def covariance(x, y, window):
    return x.rolling(int(window)).cov(y)

def signed_power(series, exponent):
    return np.sign(series) * (np.abs(series) ** exponent)

def stddev(series, window):
    return pd.Series(series).rolling(int(window)).std()

def sum_(series, window):
    return series.rolling(int(window)).sum()

def ts_min(series, window):
    return pd.Series(series).rolling(int(window)).min()

def ts_max(series, window):
    return pd.Series(series).rolling(int(window)).max()

def decay_linear(series, window):
    weights = np.arange(1, int(window) + 1)
    return series.rolling(int(window)).apply(lambda x: np.dot(x, weights) / weights.sum(), raw=True)

def scale(series):
    return series / np.sum(np.abs(series))

def product(series):
    return pd.Series(series).prod()

def sign(series):
    return np.sign(series)

def log(series):
    return np.log(series)

def sum_series(series, window):
    return series.rolling(int(window)).sum()

def Ts_Rank(series, window):
    return series.rolling(int(window)).apply(lambda x: pd.Series(x).rank(pct=True).iloc[-1], raw=False)

def IndNeutralize(series, group):
    return series.groupby(group).transform(lambda x: x - x.mean())

def min_(series, window):
    return series.rolling(int(window)).min()

def ts_argmax(series, window):
    return series.rolling(int(window)).apply(np.argmax) / window


# Example alpha formula implementation
# Alpha Factors
class Alpha_Zero:
 def __init__(self):
     super().__init__()

 @staticmethod       
 def alpha_1(df):
    condition = df['returns'] < 0
    expr = np.where(condition, stddev(df['returns'], 20), df['close'])
    ranked = rank(ts_max(signed_power(expr, 2), 5))
    return ranked - 0.5

 @staticmethod
 def alpha_2(df):
    log_volume = np.log(df['volume'].replace(0, np.nan))  # Avoid log(0)
    delta_log_vol = delta(log_volume, 2)
    ranked_delta_log_vol = rank(delta_log_vol)

    price_change = (df['close'] - df['open']) / df['open']
    ranked_price_change = rank(price_change)

    return -1 * correlation(ranked_delta_log_vol, ranked_price_change, 6)

 @staticmethod
 def alpha_3(df):
    return (-1 * correlation(rank(df['close']), rank(df['volume']), 10))

 @staticmethod
 def alpha_4(df):
    return (-1 * ts_rank(rank(df['low']), 9))

 @staticmethod
 def alpha_5(df):
    vwap = df['vwap'].rolling(window=10).mean()
    return (rank((df['open'] - vwap)) * (-1 * rank(df['close'] - df['vwap']).abs()))

 @staticmethod
 def alpha_6(df):
    return (-1 * correlation(df['open'] , df['close'], 10))

 @staticmethod
 def alpha_7(df):
    adv20 = df['volume'].rolling(window=20).mean()
    delat_close_7 = delta(df['close'], 7)
    ts_r = ts_rank(abs(delat_close_7), 60)
    
    return pd.Series( np.where(
            adv20 < df['volume'],
            (-1 * ts_r * np.sign(delat_close_7)),
            -1.0
        ),
        index=df.index
    )

 @staticmethod
 def alpha_8(df):
    term = df['open'].rolling(5).sum() * df['returns'].rolling(5).sum()
    return -1 * rank(term - delay(term, 10))

 @staticmethod
 def alpha_9(df):
    delta_close = delta(df['close'], 1)
    return np.where(
        ts_min(delta_close, 5) > 0,
        delta_close,
        np.where(ts_max(delta_close, 5) < 0, delta_close, -1 * delta_close)
    )

 @staticmethod
 def alpha_10(df):
    delta_close = delta(df['close'], 1)
    cond = np.where(
        ts_min(delta_close, 4) > 0,
        delta_close,
        np.where(ts_max(delta_close, 4) < 0, delta_close, -1 * delta_close)
    )
    return rank(pd.Series(cond, index=df.index))

 @staticmethod
 def alpha_11(df):
    diff = df['vwap'] - df['close']
    return (rank(ts_max(diff, 3)) + rank(ts_min(diff, 3))) * rank(delta(df['volume'], 3))

 @staticmethod
 def alpha_12(df):
    return sign(delta(df['volume'], 1)) * (-1 * delta(df['close'], 1))

 @staticmethod
 def alpha_13(df):
    return -1 * rank(covariance(rank(df['close']), rank(df['volume']), 5))

 @staticmethod
 def alpha_14(df):
    return (-1 * rank(delta(df['returns'], 3))) * correlation(df['open'], df['volume'], 10)

 @staticmethod
 def alpha_15(df):
    corrs = rank(correlation(rank(df['high']), rank(df['volume']), 3))
    return -1 * corrs.rolling(window=3).sum()

 @staticmethod
 def alpha_16(df):
    return -1 * rank(covariance(rank(df['high']), rank(df['volume']), 5))

 @staticmethod
 def alpha_17(df):
    return -1 * rank(covariance(rank(df['close']), rank(df['volume']), 5))

 @staticmethod
 def alpha_18(df):
    close_open = df['close'] - df['open']
    term = stddev(abs(close_open), 5) + close_open
    return -1 * rank(term + correlation(df['close'], df['open'], 10))

 @staticmethod
 def alpha_19(df):
    part1 = -1 * np.sign((df['close'] - df['close'].shift(7)) + df['close'].diff(7))
    rolling_sum = df['returns'].rolling(window=250).sum()  
    ranked = rolling_sum.rank(pct=True)                    
    return part1 * (1 + ranked)  
 
 @staticmethod
 def alpha_20(df):
    term = (df['close'] - delay(df['close'], 7)) + delta(df['close'], 7)
    return -1 * sign(term) * (1 + rank(1 + df['returns'].rolling(250).sum()))

 @staticmethod
 def alpha_21(df):
    avg_8 = df['close'].rolling(8).mean()
    std_8 = stddev(df['close'], 8)
    avg_2 = df['close'].rolling(2).mean()
    vol_ratio = df['volume'] / df['volume'].rolling(20).mean()
    return np.where(
        (avg_8 + std_8) < avg_2, -1,
        np.where(avg_2 < (avg_8 - std_8), 1,
                 np.where((vol_ratio > 1) | (vol_ratio == 1), 1, -1))
    )

 @staticmethod
 def alpha_22(df):
    return -1 * (delta(correlation(df['high'], df['volume'], 5), 5) * rank(stddev(df['close'], 20)))

 @staticmethod
 def alpha_23(df):
    return np.where(
        (df['high'].rolling(20).mean() < df['high']),
        -1 * delta(df['high'], 2),
        0)

 @staticmethod
 def alpha_24(df):
    mean_close_100 = df['close'].rolling(100).mean()
    delta_mean = delta(mean_close_100, 100)
    delay_close = delay(df['close'], 100)
    ratio = delta_mean / delay_close
    cond = (ratio < 0.05) | (ratio == 0.05)
    return np.where(
        cond,
        -1 * (df['close'] - ts_min(df['close'], 100)),
        -1 * delta(df['close'], 3))

 @staticmethod 
 def alpha_25(df):
    adv20 = df['volume'].rolling(window=20).mean()
    return rank(((-1 * df['returns']) * adv20 * df['vwap'] * (df['high'] - df['close'])))

 @staticmethod
 def alpha_26(df):
    corr_series = correlation(rank(df['volume']), rank(df['vwap']), 6)
    sum_corr = sum_(corr_series, 2) / 2.0
    rank_sum_corr = rank(sum_corr)
    return np.where(rank_sum_corr > 0.5, -1, 1)

 @staticmethod
 def alpha_27(df):
    adv20 = df['volume'].rolling(window=20).mean()
    corr_val = correlation(adv20, df['low'], 5)
    middle = (df['high'] + df['low']) / 2
    return scale(corr_val + middle - df['close'])
 
 @staticmethod
 def alpha_28(df):
    return 
 
 @staticmethod
 def alpha_29(df):
    return -1 * ts_max(correlation(ts_rank(df['volume'], 5), ts_rank(df['high'], 5), 5), 3)

 @staticmethod
 def alpha_30(df):
    cond = (sign(df['close'] - delay(df['close'], 1)) +
            sign(delay(df['close'], 1) - delay(df['close'], 2)) +
            sign(delay(df['close'], 2) - delay(df['close'], 3)))
    return ((1.0 - rank(cond)) * sum_(df['volume'], 5)) / sum_(df['volume'], 20)

 @staticmethod
 def alpha_31(df):
    adv20 = df['close'].rolling(window=20).mean()
    part1 = rank(rank(rank(decay_linear(-1 * rank(rank(delta(df['close'], 10))), 10))))
    part2 = rank(-1 * delta(df['close'], 3))
    part3 = sign(scale(correlation(adv20, df['low'], 12)))
    return part1 + part2 + part3

 @staticmethod
 def alpha_32(df):
    part1 = scale((sum_(df['close'], 7) / 7) - df['close'])
    part2 = 20 * scale(correlation(df['vwap'], delay(df['close'], 5), 230))
    return part1 + part2
 
 @staticmethod
 def alpha_33(df):
        # Alpha#33: rank((-1 * ((1 - (open / close))^1)))
        factor = -1 * ((1 - (df['open'] / df['close'])) ** 1)
        return rank(factor)

 @staticmethod
 def alpha_34(df):
        # Alpha#34: rank(((1 - rank((stddev(returns, 2) / stddev(returns, 5)))) + (1 - rank(delta(close, 1)))))
        std_2 = stddev(df['returns'], 2)
        std_5 = stddev(df['returns'], 5)
        delta_1 = delta(df['close'], 1)

        rank_std_ratio = rank(std_2 / std_5)
        rank_delta_close = rank(delta_1)

        factor = (1 - rank_std_ratio) + (1 - rank_delta_close)
        return rank(factor)

 @staticmethod
 def alpha_35(df):
        # Alpha#35: ((Ts_Rank(volume, 32) * (1 - Ts_Rank(((close + high) - low), 16))) * (1 - Ts_Rank(returns, 32)))
        ts_rank_volume = ts_rank(df['volume'], 32)
        ts_rank_price_range = ts_rank((df['close'] + df['high'] - df['low']), 16)
        ts_rank_returns = ts_rank(df['returns'], 32)

        factor = ts_rank_volume * (1 - ts_rank_price_range) * (1 - ts_rank_returns)
        return factor

 @staticmethod
 def alpha_36(df):
    adv20 = df['volume'].rolling(window=20).mean()
    part1 = 2.21 * rank(correlation(df['close'] - df['open'], delay(df['volume'], 1), 15))
    part2 = 0.7 * rank(df['open'] - df['close'])
    part3 = 0.73 * rank(ts_rank(delay(-1 * df['returns'], 6), 5))
    part4 = rank(abs(correlation(df['vwap'], adv20, 6)))
    part5 = 0.6 * rank((sum_(df['close'], 200) / 200 - df['open']) * (df['close'] - df['open']))
    return part1 + part2 + part3 + part4 + part5

 @staticmethod
 def alpha_37(df):
        # Alpha#37: rank(correlation(delay((open - close), 1), close, 200)) + rank(open - close)
        delayed_diff = delay(df['open'] - df['close'], 1)
        corr_val = correlation(delayed_diff, df['close'], 200)
        return rank(corr_val) + rank(df['open'] - df['close'])

 @staticmethod
 def alpha_38(df):
        # Alpha#38: (-1 * rank(ts_rank(close, 10))) * rank(close / open)
        ts_rk = ts_rank(df['close'], 10)
        return (-1 * rank(ts_rk)) * rank(df['close'] / df['open'])


 @staticmethod
 def alpha_39(df):
    adv20 = df['volume'].rolling(window=20).mean()
    part1 = rank(correlation(delay(df['open'] - df['close'], 1), df['close'], 200))
    part2 = rank(df['open'] - df['close'])
    part3 = (-1 * rank(ts_rank(df['close'], 10))) * rank(df['close'] / df['open'])
    part4 = (-1 * rank(delta(df['close'], 7) * (1 - rank(decay_linear(df['volume'] / adv20, 9))))) * (1 + rank(sum_(df['returns'], 250)))
    return part1 + part2 + part3 + part4

 @staticmethod
 def alpha_40(df):
    return (-1 * rank(stddev(df['high'], 10))) * correlation(df['high'], df['volume'], 10)

 @staticmethod
 def alpha_41(df):
    return ((df['high'] * df['low']) ** 0.5) - df['vwap']

 @staticmethod
 def alpha_42(df):
    return rank(df['vwap'] - df['close']) / rank(df['vwap'] + df['close'])

 @staticmethod
 def alpha_43(df):
    adv20 = df['volume'].rolling(window=20).mean()
    
    return ts_rank(df['volume'] / adv20, 20) * ts_rank(-1 * delta(df['close'], 7), 8) 
#####################################################################################################


def get_latest_candles(symbol="BTCUSDT", interval="1m", limit=200):
    url = f"https://api.binance.com/api/v3/klines?symbol={symbol}&interval={interval}&limit={limit}"
    try:
        data = requests.get(url).json()
    except Exception as e:
        print("❌ Error fetching data:", e)
        return None

    df = pd.DataFrame(data, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'number_of_trades',
        'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
    ])
    df = df.astype({
        'open': float, 'high': float, 'low': float,
        'close': float, 'volume': float
    })
    df["returns"] = df["close"].pct_change()
    typical_price = (df["high"] + df["low"] + df["close"]) / 3
    df["vwap"] = (typical_price * df["volume"]).cumsum() / df["volume"].cumsum()
    
    return   df.dropna()

def complute_all_alpha_zero(df):
    for i in range(1,14):
        func= getattr(Alpha_Zero, f'alpha_{i}')
        df[f'alpha_factors_{i}'] = func(df)
    return df.dropna()

if __name__ == "__main__":
    df = get_latest_candles(symbol="BTCUSDT", interval="1m", limit=200)

    df = complute_all_alpha_zero(df)
    print(df)




         timestamp       open       high        low      close    volume  \
15   1749484980000  107749.36  107790.70  107749.36  107790.70   1.66161   
16   1749485040000  107790.70  107852.16  107779.97  107840.98   7.11895   
22   1749485400000  107867.13  107867.13  107856.33  107860.47   4.04118   
27   1749485700000  107880.00  107894.94  107849.47  107894.93   8.45891   
30   1749485880000  107856.00  107915.98  107856.00  107897.57   9.32119   
..             ...        ...        ...        ...        ...       ...   
194  1749495720000  108453.19  108530.00  108446.58  108530.00  16.97442   
195  1749495780000  108530.00  108584.80  108488.23  108493.52  22.07898   
196  1749495840000  108493.53  108493.53  108477.60  108486.57   4.99809   
197  1749495900000  108486.58  108534.80  108486.57  108534.80   7.10149   
198  1749495960000  108534.79  108538.50  108497.70  108514.61  10.23251   

        close_time quote_asset_volume  number_of_trades  \
15   1749485039999    179081

In [None]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import requests
import time
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
import ta  # Make sure ta is installed: pip install ta

        
def complute_all_alpha_zero(df):
    for i in range(1,18):
        func= getattr(Alpha_Zero, f'alpha_{i}')
        df[f'alpha_factors_{i}'] = func(df)
    return df.dropna()

FEATURE_COLUMNS = [
    "open", "high", "low", "close", "rsi", "stoch_rsi",
    "macd_line", "macd_signal", "macd_diff", "sma_50", "sma_20",
    "bb_bbm", "bb_bbh", "bb_bbl", "returns", "vwap"
] + [f"alpha_factors_{i}" for i in range(1, 18) if i != 28 and i != 19]  

class genlenDataset(Dataset):
    def __init__(self, df, qen_len=10):
        self.qen_len = qen_len
        self.scaler = MinMaxScaler()
        print("Original df shape:", df.shape)
        print("Missing values per column:\n", df.isna().sum())
        print("Number of inf values:\n", np.isinf(df.select_dtypes(include=[np.number])).sum())
        
        data = df[FEATURE_COLUMNS].replace([np.inf, -np.inf], np.nan).dropna()
        scaled = self.scaler.fit_transform(data)
        self.target = scaled[:, FEATURE_COLUMNS.index("close")]
        self.data = scaled

    def __len__(self):
        return len(self.data) - self.qen_len
    
    def __getitem__(self, idx):
        X = self.data[idx: idx+self.qen_len]
        y = self.target[idx+self.qen_len]
        return torch.tensor(X,dtype= torch.float), torch.tensor(y,dtype= torch.float)     

class LSTM_modul(nn.Module):
    def __init__(self, input_size=14, hiddan_size=50):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hiddan_size, 6)
        self.droup = nn.Dropout(0.2)
        self.fc = nn.Linear(hiddan_size , 1)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.droup(out[:, -1 ,:])
        return self.fc(out)   

def train_model(dataloader, model, criterion, optimizer, epochs=1):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for x_batch, y_batch in dataloader:
            x_batch, y_batch = x_batch.to(device) , y_batch.to(device)
            optimizer.total_loss = 0
            optimizer.zero_grad()
            output = model(x_batch).squeeze(-1)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
    return total_loss / len(dataloader)

def predict_next(df, model, scaler, seq_len=10):
    df = df[FEATURE_COLUMNS].copy()
    df = df.replace([np.inf, - np.inf], np.nan).dropna()
    
    scaled = scaler.transform(df.values[-seq_len:])

    input_tensor = torch.tensor(scaled, dtype=torch.float32).unsqueeze(0).to(next(model.parameters()).device)
    model.eval()
    
    with torch.no_grad():
        predictions =  model(input_tensor).cpu().numpy()
    
    
    dummy = np.zeros((1, len(FEATURE_COLUMNS)))
    close_index = FEATURE_COLUMNS.index("close")
        
    dummy[0, 3] = predictions[0][0]
    inv =scaler.inverse_transform(dummy)
    return inv[0, close_index] 


def get_latest_candles(symbol="BTCUSDT", interval="1m", limit=200):
    url = f"https://api.binance.com/api/v3/klines?symbol={symbol}&interval={interval}&limit={limit}"
    try:
        data = requests.get(url).json()
    except Exception as e:
        print("❌ Error fetching data:", e)
        return None

    df = pd.DataFrame(data, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'number_of_trades',
        'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
    ])
    df = df.astype({
        'open': float, 'high': float, 'low': float,
        'close': float, 'volume': float
    })

    df['rsi'] = ta.momentum.RSIIndicator(close=df['close'], window=14).rsi()
    df['stoch_rsi'] = ta.momentum.stochrsi(close=df['close'], window=14)
    macd = ta.trend.MACD(close=df['close'])
    df['macd_line'] = macd.macd()
    df['macd_signal'] = macd.macd_signal()
    df['macd_diff'] = macd.macd_diff()

    df['sma_50'] = ta.trend.SMAIndicator(close=df['close'], window=50).sma_indicator()
    df['sma_20'] = ta.trend.SMAIndicator(close=df['close'], window=20).sma_indicator()

    bb_indicator  = ta.volatility.BollingerBands(close=df['close'])
    df['bb_bbm'] = bb_indicator.bollinger_mavg()
    df['bb_bbh'] = bb_indicator.bollinger_hband()
    df['bb_bbl'] = bb_indicator.bollinger_lband()

    df["returns"] = df["close"].pct_change()
    typical_price = (df["high"] + df["low"] + df["close"]) / 3
    df["vwap"] = (typical_price * df["volume"]).cumsum() / df["volume"].cumsum()
 
    return df.dropna()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
modul = LSTM_modul().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(modul.parameters(), lr=0.01)

latest_output = ''

if __name__ == "__main__":
    path = r"C:\Users\User\Documents\clever-trade-bot-ai-main (8)\clever-trade-bot-ai-main\P_project_with_python\Data_sources\Alpha_factors\data.csv"
    #df = get_latest_candles(symbol="BTCUSDT", interval="1m", limit=2000)
    
    df = pd.read_csv(path)
    
    #df = alpha_factor.ta_factor_indcators(df)
    #df = complute_all_alpha_zero(df)
    print(df)
    #df.to_csv('Data.csv')
    df.dropna(inplace=True)  
    print(df)

    input_size = len(FEATURE_COLUMNS)
    modul = LSTM_modul(input_size=input_size).to(device)
    dataset = genlenDataset(df)
    dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
    initial_loss = train_model(dataloader, modul, criterion, optimizer, epochs=3)
    print(f"🧪 Initial training loss: {initial_loss:.4f}")

    while True:
        new_df = get_latest_candles(symbol="BTCUSDT", interval="1m", limit=200)
        if new_df is not None:
            new_df = complute_all_alpha_zero(new_df)
            #new_df["returns"] = new_df["close"].pct_change()
            #typical_price = (new_df["high"] + new_df["low"] + new_df["close"]) / 3
            #new_df["vwap"] = (typical_price * new_df["volume"]).cumsum() / new_df["volume"].cumsum()
            new_df.dropna(inplace=True)

            dataset = genlenDataset(new_df)
            dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
            loss = train_model(dataloader, modul, criterion, optimizer, epochs=1)
            prediction = predict_next(new_df, modul, dataset.scaler, seq_len=10)

            trend = "🔼 Up" if prediction > new_df["close"].iloc[-1] else "🔽 Down"
            print(f"[{datetime.now().strftime('%H:%M:%S')}] 🔮 Close ≈ {prediction:.2f} USDT | Loss: {loss:.4f} | Trend: {trend}")

            torch.save(modul.state_dict(), "model.pth") 
        time.sleep(60)




In [None]:
#df.to_csv('Data.csv')

    

In [None]:
import math
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import requests
import time
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
import ta  # Make sure ta is installed: pip install ta

        
def complute_all_alpha_zero(df):
    for i in range(1,18):
        func= getattr(Alpha_Zero, f'alpha_{i}')
        df[f'alpha_factors_{i}'] = func(df)
    return df.dropna()

FEATURE_COLUMNS = [
    "open", "high", "low", "close", "rsi", "stoch_rsi",
    "macd_line", "macd_signal", "macd_diff", "sma_50", "sma_20",
    "bb_bbm", "bb_bbh", "bb_bbl", "returns", "vwap"
] + [f"alpha_factors_{i}" for i in range(1, 18) if i != 28 and i != 19]  

class genlenDataset(Dataset):
    def __init__(self, df, qen_len=10):
        self.qen_len = qen_len
        self.scaler = MinMaxScaler()
        print("Original df shape:", df.shape)
        print("Missing values per column:\n", df.isna().sum())
        print("Number of inf values:\n", np.isinf(df.select_dtypes(include=[np.number])).sum())
        
        data = df[FEATURE_COLUMNS].replace([np.inf, -np.inf], np.nan).dropna()
        scaled = self.scaler.fit_transform(data)
        self.target = scaled[:, FEATURE_COLUMNS.index("close")]
        self.data = scaled

    def __len__(self):
        return len(self.data) - self.qen_len
    
    def __getitem__(self, idx):
        X = self.data[idx: idx+self.qen_len]
        y = self.target[idx+self.qen_len]
        return torch.tensor(X,dtype= torch.float), torch.tensor(y,dtype= torch.float)     


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

        
    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return x 

class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_size, seq_len, d_model=128, nhead=8, num_layers=4, dim_feedforward=256, dropout=0.1):
        super().__init__()
        
        self.input_projection = nn.Linear(input_size, d_model)
        
        self.positional_encoding = PositionalEncoding(d_model=d_model, max_len=seq_len)
        
        encoding = nn.TransformerEncoderLayer(
         d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,
         dropout=dropout, batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoding, num_layers=num_layers)

        self.output_layer = nn.Linear(d_model, 1)

    
    def forward(self, x):
        # x shape: (batch_size, seq_len, input_size)
        x = self.input_projection(x)                       # => (batch_size, seq_len, d_model)
        x = self.positional_encoding(x)                    # add positional info
        x = self.transformer_encoder(x)                    # Transformer encoding
        x = x[:, -1, :]                                     # get the last time step
        out = self.output_layer(x)                         # final prediction
        return out


def train_model(dataloader, model, criterion, optimizer, epochs=1):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for x_batch, y_batch in dataloader:
            x_batch, y_batch = x_batch.to(device) , y_batch.to(device)
            optimizer.zero_grad()
            output = model(x_batch).squeeze(-1)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
    return total_loss / len(dataloader)

def predict_next(df, model, scaler, seq_len=30):
    df = df[FEATURE_COLUMNS].copy()
    df = df.replace([np.inf, - np.inf], np.nan).dropna()
    
    scaled = scaler.transform(df.values[-seq_len:])

    input_tensor = torch.tensor(scaled, dtype=torch.float32).unsqueeze(0).to(next(model.parameters()).device)
    model.eval()
    
    with torch.no_grad():
        predictions =  model(input_tensor).cpu().numpy()
    
    
    dummy = np.zeros((1, len(FEATURE_COLUMNS)))
    close_index = FEATURE_COLUMNS.index("close")
        
    dummy[0, close_index] = predictions[0][0]
    inv =scaler.inverse_transform(dummy)
    return inv[0, close_index] 


def get_latest_candles(symbol="BTCUSDT", interval="1m", limit=200):
    url = f"https://api.binance.com/api/v3/klines?symbol={symbol}&interval={interval}&limit={limit}"
    try:
        data = requests.get(url).json()
    except Exception as e:
        print("❌ Error fetching data:", e)
        return None

    df = pd.DataFrame(data, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'number_of_trades',
        'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
    ])
    df = df.astype({
        'open': float, 'high': float, 'low': float,
        'close': float, 'volume': float
    })

    df['rsi'] = ta.momentum.RSIIndicator(close=df['close'], window=14).rsi()
    df['stoch_rsi'] = ta.momentum.stochrsi(close=df['close'], window=14)
    macd = ta.trend.MACD(close=df['close'])
    df['macd_line'] = macd.macd()
    df['macd_signal'] = macd.macd_signal()
    df['macd_diff'] = macd.macd_diff()

    df['sma_50'] = ta.trend.SMAIndicator(close=df['close'], window=50).sma_indicator()
    df['sma_20'] = ta.trend.SMAIndicator(close=df['close'], window=20).sma_indicator()

    bb_indicator  = ta.volatility.BollingerBands(close=df['close'])
    df['bb_bbm'] = bb_indicator.bollinger_mavg()
    df['bb_bbh'] = bb_indicator.bollinger_hband()
    df['bb_bbl'] = bb_indicator.bollinger_lband()

    df["returns"] = df["close"].pct_change()
    typical_price = (df["high"] + df["low"] + df["close"]) / 3
    df["vwap"] = (typical_price * df["volume"]).cumsum() / df["volume"].cumsum()
 
    return df.dropna()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
modul = TimeSeriesTransformer(input_size=len(FEATURE_COLUMNS), seq_len=30).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(modul.parameters(), lr=0.01)

latest_output = ''

if __name__ == "__main__":
    path = r"C:\Users\User\Documents\clever-trade-bot-ai-main (8)\clever-trade-bot-ai-main\P_project_with_python\Data_sources\Alpha_factors\2.csv"
    #df = get_latest_candles(symbol="BTCUSDT", interval="1m", limit=2000)
    
    df = pd.read_csv(path)
    
    df = alpha_factor.ta_factor_indcators(df)
    df = complute_all_alpha_zero(df)
    print(df)
    df.to_csv('Data_00.csv')
    df.dropna(inplace=True)  
    print(df)

    input_size = len(FEATURE_COLUMNS)
    modul = TimeSeriesTransformer(input_size=input_size, seq_len=30).to(device)
    dataset = genlenDataset(df, qen_len=30)
    dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
    initial_loss = train_model(dataloader, modul, criterion, optimizer, epochs=3)
    print(f"🧪 Initial training loss: {initial_loss:.4f}")

    while True:
        new_df = get_latest_candles(symbol="BTCUSDT", interval="1m", limit=200)
        if new_df is not None:
            new_df = complute_all_alpha_zero(new_df)
            #new_df["returns"] = new_df["close"].pct_change()
            #typical_price = (new_df["high"] + new_df["low"] + new_df["close"]) / 3
            #new_df["vwap"] = (typical_price * new_df["volume"]).cumsum() / new_df["volume"].cumsum()
            new_df.dropna(inplace=True)

            dataset = genlenDataset(new_df)
            dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
            loss = train_model(dataloader, modul, criterion, optimizer, epochs=1)
            prediction = predict_next(new_df, modul, dataset.scaler, seq_len=10)

            trend = "🔼 Up" if prediction > new_df["close"].iloc[-1] else "🔽 Down"
            print(f"[{datetime.now().strftime('%H:%M:%S')}] 🔮 Close ≈ {prediction:.2f} USDT | Loss: {loss:.4f} | Trend: {trend}")

            torch.save(modul.state_dict(), "model.pth") 
        time.sleep(60)




NameError: name 'alpha_factor' is not defined

In [None]:
import ccxt
import pandas as pd
from datetime import datetime
import time
import os

exchange = ccxt.binance()

all_data = []
limit = 1000
since = exchange.parse8601('2025-06-09T18:58:00')
symbol = 'BTC/USDT'

path = r'C:\Users\User\Documents\clever-trade-bot-ai-main (8)\message_types.csv'

file_exists = os.path.exists(path)

while since < exchange.milliseconds():
    ohlcv = exchange.fetch_ohlcv(symbol, timeframe='1m', since=since, limit=limit)
    if not ohlcv:
        break

    df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    
    df.to_csv(path, mode='a', header=not file_exists, index=False)
    file_exists = True  #   

    since = ohlcv[-1][0] + 60_000  #  
    time.sleep(0.5)
