In [None]:
!pip install ta

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit, cross_val_score

# CONFIG
TICKER = "TCS.BO"
START_DATE = "2022-01-01"
FORECAST_HORIZON = 5  # days ahead
GOOD_THRESHOLD = 0.02  # 2% price increase considered good
TEST_SPLITS = 5
N_ESTIMATORS = 200
RANDOM_STATE = 42

# ----------------------------------------------------
# 1. Fetch stock data
# ----------------------------------------------------
def get_stock_data(ticker, start):
    df = yf.download(ticker, start=start)
    return df

# ----------------------------------------------------
# 2. Feature engineering for technical indicators
# ----------------------------------------------------
def create_features(df):
    feats = pd.DataFrame(index=df.index)
    feats["SMA_20"] = df["Close"].rolling(20).mean()
    feats["SMA_50"] = df["Close"].rolling(50).mean()
    feats["EMA_200"] = df["Close"].ewm(span=200).mean()
    feats["RSI"] = compute_rsi(df["Close"], 14)
    feats["MACD"] = compute_macd(df["Close"])
    feats["Volatility"] = df["Close"].pct_change().rolling(20).std()
    feats["Volume"] = df["Volume"].rolling(5).mean()
    return feats

def compute_rsi(series, period=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

def compute_macd(series, short=12, long=26, signal=9):
    short_ema = series.ewm(span=short).mean()
    long_ema = series.ewm(span=long).mean()
    return short_ema - long_ema

# ----------------------------------------------------
# 3. Target: 1 if price rises ≥ threshold after horizon days
# ----------------------------------------------------
def create_target(df, horizon, threshold):
    future_price = df["Close"].shift(-horizon)
    return ((future_price - df["Close"]) / df["Close"] >= threshold).astype(int)

# ----------------------------------------------------
# 4. Model training
# ----------------------------------------------------
def train_time_series_model(X, y, n_splits):
    model = RandomForestClassifier(
        n_estimators=N_ESTIMATORS,
        random_state=RANDOM_STATE
    )
    tscv = TimeSeriesSplit(n_splits=n_splits)
    scores = cross_val_score(model, X, y, cv=tscv, scoring="accuracy")
    model.fit(X, y)
    return model, scores, model.feature_importances_

# ----------------------------------------------------
# 5. Main pipeline
# ----------------------------------------------------
def main():
    print(f"=== AI Technical Analysis for {TICKER} ===")
    print(f"Data: {START_DATE} → latest | Horizon: {FORECAST_HORIZON}d | Good if ≥ {GOOD_THRESHOLD*100:.1f}%\n")

    # Load data
    df = get_stock_data(TICKER, START_DATE)
    print(df)
    # Create features & target
    feats = create_features(df)
    target = create_target(df, FORECAST_HORIZON, GOOD_THRESHOLD)

    # Merge features + target
    data = feats.copy()
    data['target'] = target
    data.dropna(inplace=True)

    X = data.drop(columns=["target"])
    y = data["target"].squeeze()

    # Train-test split (last row for prediction)
    X_train, y_train = X.iloc[:-1], y.iloc[:-1]
    X_latest = X.iloc[[-1]]
    latest_date = X_latest.index[-1].date()

    # Train model
    model, cv_scores, feat_importance = train_time_series_model(X_train, y_train, TEST_SPLITS)

    # Prediction for latest day
    pred = model.predict(X_latest)[0]
    prob = model.predict_proba(X_latest)[0][1]

    # Output
    print(f"Cross-Validation Accuracy: {cv_scores.mean():.2%} (+/- {cv_scores.std():.2%})")
    print(f"Prediction date: {latest_date}")
    print(f"Prediction: {'✅ GOOD' if pred == 1 else '❌ NOT GOOD'}")
    print(f"Confidence: {prob:.2%}\n")

    # Feature importance
    feat_df = pd.DataFrame({"Feature": X.columns, "Importance": feat_importance})
    feat_df = feat_df.sort_values("Importance", ascending=False)
    print("Top features influencing decision:")
    print(feat_df.head(10))

In [None]:
main()
