In [46]:
!pip install numpy pandas scikit-learn ta


Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: ta
  Building wheel for ta (setup.py): started
  Building wheel for ta (setup.py): finished with status 'done'
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29421 sha256=bdaac113d17df7da28bafe5e07b5582f4c41d1041e7ed85322e78df3f108e581
  Stored in directory: c:\users\keert\appdata\local\pip\cache\wheels\a1\d7\29\7781cc5eb9a3659d032d7d15bdd0f49d07d2b24fec29f44bc4
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0




In [3]:
!pip install ace_tools

Collecting ace_tools
  Downloading ace_tools-0.0-py3-none-any.whl.metadata (300 bytes)
Downloading ace_tools-0.0-py3-none-any.whl (1.1 kB)
Installing collected packages: ace_tools
Successfully installed ace_tools-0.0




In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import ta  # For technical indicators
from datetime import timedelta

# 1. Load & parse
df = pd.read_csv('UTI_Gold_ETF_MF_USD_01012023_31122024_ALL.csv')
df['Date'] = pd.to_datetime(df['All_Date'], format='%m-%d-%Y')
df = df.sort_values('Date').set_index('Date')

# 2. Clean numeric columns
def parse_percent(x):
    if pd.isna(x): return np.nan
    return float(x.strip('%')) / 100

def parse_vol(x):
    if pd.isna(x): return np.nan
    if isinstance(x, str):
        if x.endswith('K'): return float(x[:-1]) * 1e3
        if x.endswith('M'): return float(x[:-1]) * 1e6
    try: return float(x)
    except: return np.nan

df['ETF_Change'] = df['ETF_Change'].apply(parse_percent)
df['USD_Change'] = df['USD_Change'].apply(parse_percent)
df['ETF_Vol'] = df['ETF_Vol'].apply(parse_vol)
df['Gold_Volume'] = df['Gold_Volume'].apply(parse_vol)

# Convert other numerics
num_cols = ['MF_NAV','ETF_Price','ETF_Open','ETF_High','ETF_Low',
            'USD_Price','USD_Open','USD_High','USD_Low',
            'Gold_Open','Gold_High','Gold_Low','Gold_Close']
for col in num_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

#df = df.fillna(method='ffill').dropna()
from statsmodels.tsa.statespace.structural import UnobservedComponents

# Apply Kalman smoothing to all numeric columns with missing data
for col in df.columns:
    if df[col].isnull().sum() > 0 and df[col].dtype != 'object':
        try:
            model = UnobservedComponents(df[col], level='llevel')
            result = model.fit(disp=False)
            df[col] = df[col].fillna(result.fittedvalues)
        except:
            continue  # fallback to skip if fitting fails


# 3. Add technical indicators
df['SMA_10'] = ta.trend.sma_indicator(df['ETF_Price'], window=10)
df['EMA_10'] = ta.trend.ema_indicator(df['ETF_Price'], window=10)
macd = ta.trend.macd(df['ETF_Price'])
df['MACD'] = macd
bb = ta.volatility.BollingerBands(df['ETF_Price'], window=20)
df['BB_High'] = bb.bollinger_hband()
df['BB_Low'] = bb.bollinger_lband()

# 4. Add lag features
for lag in [1, 3, 7, 14,30]:
    df[f'MF_NAV_lag{lag}'] = df['MF_NAV'].shift(lag)

df = df.dropna()

# 5. Feature set
features = [
    'ETF_Price', 'ETF_Vol', 'USD_Price', 'Gold_Close', 'Gold_Volume',
    'SMA_10', 'EMA_10', 'MACD', 'BB_High', 'BB_Low',
    'MF_NAV_lag1', 'MF_NAV_lag3', 'MF_NAV_lag7', 'MF_NAV_lag14'
]
target = 'MF_NAV'

X = df[features]
y = df[target]

# 6. Scale
scaler_x = StandardScaler().fit(X)
X_scaled = scaler_x.transform(X)
scaler_y = StandardScaler().fit(y.values.reshape(-1,1))
y_scaled = scaler_y.transform(y.values.reshape(-1,1)).flatten()

# 7. Train/test split
X_train = X_scaled[:-11]
y_train = y_scaled[:-11]
X_test = X_scaled[-11:]
y_test = y[-11:].values

# 8. Fit SVR (Cascaded SVM)
svm = SVR(C=1.0, kernel='poly', epsilon=0.1,degree=3,gamma='scale')
svm.fit(X_train, y_train)

# 9. Predict Jan 1–15, 2025 (business days)
future_dates = pd.bdate_range('2025-01-01', '2025-01-15')
last_known = df.iloc[-1]

# Extend dataframe for prediction
forecast_df = pd.DataFrame(index=future_dates, columns=df.columns)
for col in df.columns:
    forecast_df[col] = last_known[col]

df_all = pd.concat([df, forecast_df])
for lag in [1,3,7,14,30]:
    df_all[f'MF_NAV_lag{lag}'] = df_all['MF_NAV'].shift(lag)

# Predict step-by-step
predictions = []
for date in future_dates:
    row = df_all.loc[date]
    x_f = row[features].values.reshape(1, -1)
    x_f_scaled = scaler_x.transform(x_f)
    y_pred_scaled = svm.predict(x_f_scaled)
    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).item()
    predictions.append(y_pred)
    df_all.at[date, 'MF_NAV'] = y_pred
    # Update lag features for next day
    for lag in [1,3,7,14]:
        prev_date = date - pd.Timedelta(days=lag)
        if prev_date in df_all.index:
            df_all.at[date, f'MF_NAV_lag{lag}'] = df_all.at[prev_date, 'MF_NAV']

# 10. Display
output = pd.DataFrame({
    'Date': future_dates.strftime('%Y-%m-%d'),
    'Predicted_NAV': np.round(predictions, 4)
})
print(output)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


          Date  Predicted_NAV
0   2025-01-01        14.6621
1   2025-01-02        14.5409
2   2025-01-03        14.6754
3   2025-01-06        14.6854
4   2025-01-07        14.7101
5   2025-01-08        14.6977
6   2025-01-09        14.7580
7   2025-01-10        14.8114
8   2025-01-13        14.7403
9   2025-01-14        14.7766
10  2025-01-15        14.7469


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [62]:
y_pred_test = scaler_y.inverse_transform(svm.predict(X_test).reshape(-1,1)).flatten()
print("Backtest MAE:", mean_absolute_error(y_test, y_pred_test))
print("Backtest RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_test)))


Backtest MAE: 0.21148356327586415
Backtest RMSE: 0.2384069071193449


In [4]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import ta
from datetime import timedelta
from statsmodels.tsa.statespace.structural import UnobservedComponents

# 1. Load data
df = pd.read_csv("UTI_Gold_ETF_MF_USD_01012023_31122024_ALL.csv")
df['Date'] = pd.to_datetime(df['All_Date'], format='%m-%d-%Y')
df = df.sort_values('Date').set_index('Date')

# 2. Clean numeric columns
def parse_percent(x):
    if pd.isna(x): return np.nan
    return float(x.strip('%')) / 100

def parse_vol(x):
    if pd.isna(x): return np.nan
    if isinstance(x, str):
        if x.endswith('K'): return float(x[:-1]) * 1e3
        if x.endswith('M'): return float(x[:-1]) * 1e6
    try: return float(x)
    except: return np.nan

df['ETF_Change'] = df['ETF_Change'].apply(parse_percent)
df['USD_Change'] = df['USD_Change'].apply(parse_percent)
df['ETF_Vol'] = df['ETF_Vol'].apply(parse_vol)
df['Gold_Volume'] = df['Gold_Volume'].apply(parse_vol)

# Convert other numerics
num_cols = ['MF_NAV','ETF_Price','ETF_Open','ETF_High','ETF_Low',
            'USD_Price','USD_Open','USD_High','USD_Low',
            'Gold_Open','Gold_High','Gold_Low','Gold_Close']
for col in num_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Kalman smoothing for missing values
for col in df.columns:
    if df[col].isnull().sum() > 0 and df[col].dtype != 'object':
        try:
            model = UnobservedComponents(df[col], level='llevel')
            result = model.fit(disp=False)
            df[col] = df[col].fillna(result.fittedvalues)
        except:
            continue

# 3. Technical indicators
df['SMA_10'] = ta.trend.sma_indicator(df['ETF_Price'], window=10)
df['EMA_10'] = ta.trend.ema_indicator(df['ETF_Price'], window=10)
df['MACD'] = ta.trend.macd(df['ETF_Price'])
bb = ta.volatility.BollingerBands(df['ETF_Price'], window=20)
df['BB_High'] = bb.bollinger_hband()
df['BB_Low'] = bb.bollinger_lband()

# 4. Lag features
for lag in [1, 3, 7, 14, 30]:
    df[f'MF_NAV_lag{lag}'] = df['MF_NAV'].shift(lag)

# 5. Additional engineered features
df['ETF_Gold_Spread'] = df['ETF_Price'] - df['Gold_Close']
df['ETF_Vol_Pressure'] = df['ETF_Vol'] / (df['ETF_Price'] + 1e-6)
df['NAV_PCT_3'] = df['MF_NAV'].pct_change(3)

# Kalman smoothing after feature engineering
for col in df.columns:
    if df[col].dtype in [np.float64, np.int64] and df[col].isnull().any():
        try:
            model = UnobservedComponents(df[col], level='llevel')
            result = model.fit(disp=False)
            df[col] = df[col].fillna(result.fittedvalues)
        except:
            df[col] = df[col].fillna(method='bfill')

# 6. Final features and target
features = [
    'ETF_Price', 'ETF_Vol', 'USD_Price', 'Gold_Close', 'Gold_Volume',
    'SMA_10', 'EMA_10', 'MACD', 'BB_High', 'BB_Low',
    'MF_NAV_lag1', 'MF_NAV_lag3', 'MF_NAV_lag7', 'MF_NAV_lag14',
    'ETF_Gold_Spread', 'ETF_Vol_Pressure', 'NAV_PCT_3'
]
target = 'MF_NAV'

df = df.dropna()
X = df[features]
y = df[target]

# 7. Scale
scaler_x = StandardScaler().fit(X)
X_scaled = scaler_x.transform(X)
scaler_y = StandardScaler().fit(y.values.reshape(-1,1))
y_scaled = scaler_y.transform(y.values.reshape(-1,1)).flatten()

# 8. Train/test split
X_train = X_scaled[:-11]
y_train = y_scaled[:-11]
X_test = X_scaled[-11:]
y_test = y[-11:].values

# 9. SVR with tuned hyperparameters
svm = SVR(C=50, kernel='poly', epsilon=0.01, degree=3, gamma='scale')
svm.fit(X_train, y_train)

# 10. Forecast Jan 1–15, 2025
future_dates = pd.bdate_range('2025-01-01', '2025-01-15')
last_known = df.iloc[-1]
forecast_df = pd.DataFrame(index=future_dates, columns=df.columns)
for col in df.columns:
    forecast_df[col] = last_known[col]

df_all = pd.concat([df, forecast_df])
for lag in [1, 3, 7, 14, 30]:
    df_all[f'MF_NAV_lag{lag}'] = df_all['MF_NAV'].shift(lag)

# Predict step-by-step
predictions = []
for date in future_dates:
    row = df_all.loc[date]
    x_f = row[features].values.reshape(1, -1)
    x_f_scaled = scaler_x.transform(x_f)
    y_pred_scaled = svm.predict(x_f_scaled)
    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).item()
    predictions.append(y_pred)
    df_all.at[date, 'MF_NAV'] = y_pred
    # update lags
    for lag in [1, 3, 7, 14, 30]:
        prev_date = date - pd.Timedelta(days=lag)
        if prev_date in df_all.index:
            df_all.at[date, f'MF_NAV_lag{lag}'] = df_all.at[prev_date, 'MF_NAV']

# Output forecast
output_df = pd.DataFrame({
    'Date': future_dates.strftime('%Y-%m-%d'),
    'Predicted_NAV': np.round(predictions, 4)
})
output_df.reset_index(drop=True, inplace=True)

import ace_tools as tools; 
tools.display_dataframe_to_user(name="Predicted NAV (SVR)", dataframe=output_df)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


ModuleNotFoundError: No module named 'ace_tools'

In [5]:
import ace_tools as tools

ModuleNotFoundError: No module named 'ace_tools'