In [1]:
# Cell 1: imports & utility functions
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA, KernelPCA, FastICA
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [3]:
# Cell 2: TLBO optimizer
def tlbo_optimize(obj_func, bounds, pop_size=15, max_iter=30, verbose=False):
    dim = len(bounds)
    pop = np.random.rand(pop_size, dim)
    for i, (low, high) in enumerate(bounds):
        pop[:,i] = low + pop[:,i]*(high - low)
    def clip(x):
        return np.array([np.clip(x[j], bounds[j][0], bounds[j][1]) for j in range(dim)])
    fitness = np.array([obj_func(sol) for sol in pop])
    best_idx = np.argmin(fitness)
    best, best_fit = pop[best_idx].copy(), fitness[best_idx]
    if verbose: print(f"Init best: {best_fit:.6f}")
    for it in range(1, max_iter+1):
        mean_pop = pop.mean(axis=0)
        teacher = pop[np.argmin(fitness)]
        TF = np.random.randint(1,3)
        # Teaching phase
        for i in range(pop_size):
            new = clip(pop[i] + np.random.rand(dim)*(teacher - TF*mean_pop))
            fnew = obj_func(new)
            if fnew < fitness[i]:
                pop[i], fitness[i] = new, fnew
        # Learning phase
        for i in range(pop_size):
            j = np.random.choice([k for k in range(pop_size) if k!=i])
            diff = pop[i]-pop[j] if fitness[i]<fitness[j] else pop[j]-pop[i]
            new = clip(pop[i] + np.random.rand(dim)*diff)
            fnew = obj_func(new)
            if fnew < fitness[i]:
                pop[i], fitness[i] = new, fnew
        idx = np.argmin(fitness)
        if fitness[idx] < best_fit:
            best, best_fit = pop[idx].copy(), fitness[idx]
        if verbose and it%10==0:
            print(f"Iter {it}/{max_iter} best {best_fit:.6f}")
    return best, best_fit

# Cell 2: compute 17 technical indicators
def compute_indicators(df):
    o,h,l,c = df['Open'], df['High'], df['Low'], df['Close']
    X = pd.DataFrame(index=df.index)
    X['MA10'] = c.rolling(10).mean()
    ma20 = c.rolling(20).mean(); X['BIAS20'] = (c-ma20)/ma20
    ema12 = c.ewm(span=12).mean(); ema26 = c.ewm(span=26).mean()
    X['MACD'] = ema12 - ema26
    low9, high9 = l.rolling(9).min(), h.rolling(9).max()
    X['%K'] = (c-low9)/(high9-low9)*100
    X['%D'] = X['%K'].rolling(3).mean()
    X['%SD']= X['%D'].rolling(3).mean()
    X['WR']  = (high9-c)/(high9-low9)*100
    X['ROC'] = c.pct_change(10)*100
    delta = c.diff(); up=delta.clip(lower=0); dn=-delta.clip(upper=0)
    AU = up.ewm(alpha=1/5).mean(); AD = dn.ewm(alpha=1/5).mean()
    X['RSI'] = AU/(AU+AD)*100
    tp = (h+l+c)/3; MATP = tp.rolling(24).mean()
    MDi = tp.rolling(24).apply(lambda x: np.abs(x - MATP.loc[x.index[0]]).mean())
    X['CCI'] = (tp - MATP)/(0.015*MDi)
    X['PSY'] = delta.rolling(13).apply(lambda x: (x>0).sum()/13*100)
    X['BSMI']= (h-o).rolling(26).sum()/(o-l).rolling(26).sum()
    X['BSWI']= (h-c.shift()).rolling(26).sum()/(c.shift()-l).rolling(26).sum()
    X['MOM'] = c - c.shift(4)
    ma5, ma10 = c.rolling(5).mean(), c.rolling(10).mean()
    X['DIS5']= c/ma5; X['DIS10']=c/ma10; X['MAO']=(ma5-ma10)/ma5
    return X.dropna()

# Cell 2: evaluation metrics
def metrics(y_true, y_pred):
    n = len(y_true)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae  = mean_absolute_error(y_true, y_pred)
    nmse = ((y_true-y_pred)**2).sum()/((n-1)*np.var(y_true,ddof=1))
    ds   = np.mean(np.sign(np.diff(y_true))==np.sign(np.diff(y_pred)))*100
    return rmse, mae, nmse, ds


In [19]:
import pandas as pd
from dateutil.parser import parse

def load_mcx_comdex(path: str) -> pd.DataFrame:
    """Load MCX COMDEX from CSV, real Excel, or HTML‑wrapped .xls."""
    ext = path.rsplit('.', 1)[-1].lower()

    # 1 – get the raw table
    if ext in ('xls', 'xlsx'):
        try:                           # real Excel
            df = pd.read_excel(path, engine='openpyxl' if ext=='xlsx' else None)
        except Exception:              # HTML masquerading as .xls
            df = pd.read_html(path)[0]
    else:                              # CSV
        df = pd.read_csv(path)

    # 2 – trim to needed columns and parse dates
    df = df[['Date', 'Open', 'High', 'Low', 'Close']].copy()
    df.loc[:, 'Date'] = df['Date'].apply(lambda s: parse(str(s), dayfirst=True))
    df = df.set_index('Date').sort_index()

    # 3 – force numeric (kill commas / blanks)
    for c in ['Open','High','Low','Close']:
        df[c] = pd.to_numeric(df[c].astype(str).str.replace(',', ''), errors='coerce')

    # 4 – drop rows where O/H/L are all zero or NaN
    mask = (df[['Open','High','Low']] == 0).all(axis=1) | df[['Open','High','Low']].isna().all(axis=1)
    df = df[~mask]
    df = df[~df.index.duplicated(keep='last')]   # keep one row per day
    return df

# --- use it ---
path = "MCXiCOMDEX_HistoricalData.xls"   # or "comdex.csv"
df = load_mcx_comdex(path)
df.head()


Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-12-16,4387.1,4557.71,4383.04,4538.75
2019-12-17,4538.75,4538.75,4413.88,4476.32
2019-12-18,4465.46,4495.32,4367.74,4381.31
2019-12-19,4397.6,4430.17,4321.59,4370.45
2019-12-20,9080.92,9099.43,9057.35,9080.92


In [21]:
ind = compute_indicators(df)
scaler = MinMaxScaler()
X_norm = scaler.fit_transform(ind)
dates = ind.index


In [None]:
# Cell 5+6 combined: Dim‑red → TLBO‑SVR → metrics + optional plots
from itertools import product

# ── SETTINGS ────────────────────────────────────────────────────────────
horizons     = [1, 3, 5]        # forecast steps
pop_size     = 15               # TLBO learners
generations  = 30               # TLBO iterations
plot_results = True             # set False to skip charts
# ────────────────────────────────────────────────────────────────────────

dr_models = {
    "PCA" :  PCA(n_components=6),
    "KPCA":  KernelPCA(n_components=6, kernel="rbf", gamma=0.01),
    "ICA" :  FastICA(n_components=6, random_state=0)
}

for name, dr in dr_models.items():
    X_dr = dr.fit_transform(X_norm)
    print(f"\n=== {name} ===")
    
    for h in horizons:
        # --- align target with indicator index -------------------------
        close_aligned = df['Close'].reindex(ind.index)        # same index as X_dr
        y_full        = close_aligned.shift(-h)               # shift for horizon
        y             = y_full.iloc[:-h].values               # drop last h NaNs
        Xh            = X_dr[:-h]                             # match length

        split = int(0.83 * len(Xh))
        Xtr, Xte = Xh[:split], Xh[split:]
        ytr, yte = y[:split],  y[split:]

        # --- objective for TLBO (CV‑MSE) ------------------------------
        tscv = TimeSeriesSplit(n_splits=5)
        def obj(params):
            C, g = params
            svr = SVR(kernel="rbf", C=C, gamma=g)
            scores = cross_val_score(
                svr, Xtr, ytr,
                cv=tscv,
                scoring="neg_mean_squared_error",
                n_jobs=-1
            )
            return -scores.mean()

        bounds = [(0.01, 35000), (0.0001, 32)]
        best, _ = tlbo_optimize(obj, bounds,
                                pop_size=pop_size,
                                max_iter=generations,
                                verbose=False)
        Copt, gopt = best

        # --- final fit & predict --------------------------------------
        svr = SVR(kernel="rbf", C=Copt, gamma=gopt)
        svr.fit(Xtr, ytr)
        preds = svr.predict(Xte)

        rmse, mae, nmse, ds = metrics(yte, preds)
        print(f"{h}-day → RMSE={rmse:.4f}, MAE={mae:.4f}, "
              f"NMSE={nmse:.2e}, DS={ds:.2f}%")

        # --- plotting -------------------------------------------------
        if plot_results:
            plt.figure(figsize=(8,3))
            plt.plot(dates[split:-h], yte, label="Actual", linewidth=2)
            plt.plot(dates[split:-h], preds, "--", label="Predicted")
            plt.title(f"{name} {h}-Day Forecast")
            plt.xlabel("Date"); plt.ylabel("COMDEX"); plt.legend()
            plt.tight_layout()

if plot_results:
    plt.show()
