In [None]:
!pip -q install pandas numpy requests scikit-learn joblib

import requests, time
import pandas as pd
import numpy as np
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

def fetch_yahoo_prices(ticker: str, range_: str="3y", interval: str="1d"):
    bases = [
        "https://query1.finance.yahoo.com/v8/finance/chart/",
        "https://query2.finance.yahoo.com/v8/finance/chart/",
    ]
    headers = {"User-Agent":"Mozilla/5.0","Accept":"application/json","Accept-Language":"en-US,en;q=0.9"}
    for _ in range(3):
        for base in bases:
            try:
                url = f"{base}{ticker}"
                r = requests.get(url, params={"range":range_, "interval":interval}, headers=headers, timeout=20)
                if r.status_code != 200:
                    continue
                js = r.json()
                result = (js.get("chart", {}).get("result") or [None])[0]
                if not result:
                    continue
                quote = (result.get("indicators", {}).get("quote") or [None])[0]
                if not quote:
                    continue
                df = pd.DataFrame({
                    "open": quote.get("open", []),
                    "high": quote.get("high", []),
                    "low": quote.get("low", []),
                    "close": quote.get("close", []),
                    "volume": quote.get("volume", []),
                }).dropna(subset=["close"]).reset_index(drop=True)
                if len(df) >= 300:
                    return df
            except Exception:
                continue
        time.sleep(1)
    return None

def sma(s, w): return s.rolling(w).mean()
def ema(s, span): return s.ewm(span=span, adjust=False).mean()

def rsi(close, period=14):
    delta = close.diff()
    gain = delta.where(delta > 0, 0.0)
    loss = (-delta).where(delta < 0, 0.0)
    avg_gain = gain.rolling(period).mean()
    avg_loss = loss.rolling(period).mean()
    rs = avg_gain / (avg_loss + 1e-9)
    return 100 - (100/(1+rs))

def macd(close, fast=12, slow=26, signal=9):
    macd_line = ema(close, fast) - ema(close, slow)
    signal_line = ema(macd_line, signal)
    hist = macd_line - signal_line
    return macd_line, signal_line, hist

def bollinger(close, window=20, num_std=2):
    mid = sma(close, window)
    std = close.rolling(window).std()
    upper = mid + num_std*std
    lower = mid - num_std*std
    return upper, mid, lower

def atr(high, low, close, period=14):
    prev_close = close.shift(1)
    tr = pd.concat([(high-low), (high-prev_close).abs(), (low-prev_close).abs()], axis=1).max(axis=1)
    return tr.rolling(period).mean()

def build_features(df):
    d = df.copy()
    c, h, l = d["close"], d["high"], d["low"]
    v = d["volume"].fillna(0)

    d["sma10"] = sma(c,10); d["sma20"]=sma(c,20); d["sma50"]=sma(c,50)
    d["ema10"] = ema(c,10); d["ema20"]=ema(c,20); d["ema50"]=ema(c,50)
    d["rsi14"] = rsi(c,14)

    m, s, hist = macd(c,12,26,9)
    d["macd"]=m; d["macd_signal"]=s; d["macd_hist"]=hist

    u, mid, lo = bollinger(c,20,2)
    d["bb_width"] = (u - lo) / (mid + 1e-9)

    d["atr14"] = atr(h,l,c,14)
    d["ret1"] = c.pct_change(1)
    d["ret5"] = c.pct_change(5)
    d["vol20"] = d["ret1"].rolling(20).std()

    d["vol_ma20"] = v.rolling(20).mean()
    d["vol_ratio"] = v / (d["vol_ma20"] + 1e-9)

    d = d.dropna().reset_index(drop=True)
    return d

FEATURE_COLS = ["sma10","sma20","sma50","ema10","ema20","ema50","rsi14","macd","macd_signal","macd_hist","bb_width","atr14","ret1","ret5","vol20","vol_ratio"]

def label_tp_sl(close_series, horizon=10, tp=0.05, sl=-0.02):
    closes = close_series.values
    labels=[]
    for i in range(len(closes)):
        entry = closes[i]
        tp_price = entry*(1+tp)
        sl_price = entry*(1+sl)
        if i+horizon >= len(closes):
            labels.append(np.nan); continue
        fut = closes[i+1:i+horizon+1]
        hit_tp = np.where(fut >= tp_price)[0]
        hit_sl = np.where(fut <= sl_price)[0]
        if len(hit_tp)==0 and len(hit_sl)==0: labels.append(0)
        elif len(hit_tp)>0 and len(hit_sl)==0: labels.append(1)
        elif len(hit_tp)==0 and len(hit_sl)>0: labels.append(0)
        else: labels.append(1 if hit_tp[0] < hit_sl[0] else 0)
    return pd.Series(labels)

TICKERS = ["1120.SR","2010.SR","2082.SR","1180.SR","2280.SR","2200.SR","1211.SR","1050.SR"]
rows=[]
for t in TICKERS:
    df = fetch_yahoo_prices(t)
    if df is None:
        print("No data:", t); continue
    feat = build_features(df)
    feat["label"] = label_tp_sl(feat["close"], horizon=10, tp=0.05, sl=-0.02)
    feat = feat.dropna(subset=["label"]).reset_index(drop=True)
    if len(feat) < 200:
        print("Too few:", t); continue
    rows.append(feat)
data = pd.concat(rows, ignore_index=True)

X = data[FEATURE_COLS].astype(float)
y = data["label"].astype(int)

# split زمني (بدون shuffle)
split = int(len(data)*0.8)
X_train, X_test = X.iloc[:split], X.iloc[split:]
y_train, y_test = y.iloc[:split], y.iloc[split:]

model = LogisticRegression(max_iter=2000)
model.fit(X_train, y_train)
prob = model.predict_proba(X_test)[:,1]
auc = roc_auc_score(y_test, prob)
print("AUC:", auc)

joblib.dump(model, "model.joblib")
print("Saved model.joblib")


AUC: 0.5076514871035419
Saved model.joblib


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
