In [None]:
#!git clone https://github.com/triet4p/itapia.git

# 1. Preparing data and library

In [None]:
import sys
import os
sys.path.append("F:\\ai-ml\\itapia\\ai_service_quick")

In [None]:
import os
os.environ['KAGGLE_USERNAME'] = 'trietp1253201581'
os.environ['KAGGLE_KEY'] = 'aa7316d3216ad11dd599a97d7817dc10'

In [None]:
#!pip install python-dotenv

In [None]:
from datetime import datetime
import pandas as pd
import numpy as np
import pickle
import json

from numpy.lib.stride_tricks import sliding_window_view

# Machine Learning & Validation
from sklearn.model_selection import TimeSeriesSplit
from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.multioutput import MultiOutputRegressor
from lightgbm import LGBMRegressor, LGBMClassifier 
#from xgboost import XGBRegressor, XGBClassifier
# Hyperparameter Tuning (khuyến nghị)
import optuna 

# Explainability
import shap

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

from app.forecasting.training import TrainingOrchestrator
from app.forecasting.task.triple_barrier import TripleBarrierTask, find_triple_barrier_optimal_params
from app.forecasting.task.ndays_distribution import NDaysDistributionTask
from app.forecasting.model import ScikitLearnForecastingModel
from app.forecasting.training.data_split import train_test_split
from app.forecasting.post_processing import NDaysDistributionPostProcessor, RoundingProcessor
from app.forecasting.training.optim import LGBMClassifierObjective, MultiOutLGBMRegressionObjective, get_best_params_for_kernel_model
from app.forecasting.training.data_split import get_walk_forward_splits
from app.core.utils import FORECASTING_TRAINING_BONUS_FEATURES, FORECASTING_TRAINING_SCORE_WEIGHTS
import app.core.config as cfg

# Cấu hình notebook
pd.set_option('display.max_columns', 100)
shap.initjs()

In [None]:
SECTOR = 'TECH'

In [None]:
task1_id = cfg.TASK_ID_SECTOR_TEMPLATE.format(
    problem=cfg.TRIPLE_BARRIER_PROBLEM_ID,
    sector=SECTOR
)
task1 = TripleBarrierTask(task1_id)

In [None]:
model1 = ScikitLearnForecastingModel('LGBM')

In [None]:
model1.assign_task(task1)

In [None]:
model1.load_model_from_kaggle(cfg.KAGGLE_USERNAME)

In [None]:
model1.kernel_model

In [None]:
enriched_df = pd.read_csv(f'F:\\ai-ml\\itapia\\ai_service_quick\\local\\training_{SECTOR}.csv', index_col='datetime_utc')
enriched_df.index = pd.to_datetime(enriched_df.index)
df = enriched_df.copy()

In [None]:
X_instance = pd.DataFrame(df[df.ticker == 'AAPL'].loc[pd.to_datetime("2025-06-03", utc=True):pd.to_datetime("2025-06-03", utc=True)])

In [None]:
X_instance = X_instance[task1.selected_features]

In [None]:
from app.forecasting.explainer import TreeSHAPExplainer

In [None]:
explainer = TreeSHAPExplainer(model1)

In [None]:
model1.predict(X_instance)

In [None]:
explainer.explain_prediction(X_instance)

In [None]:
task2_id = cfg.TASK_ID_SECTOR_TEMPLATE.format(
    problem=cfg.REG_5D_DIS_PROBLEM_ID,
    sector=SECTOR
)
task2 = NDaysDistributionTask(task2_id, 5)

In [None]:
rnd_prc = RoundingProcessor(4)
n5d_prc = NDaysDistributionPostProcessor(task2)

In [None]:
model2 = ScikitLearnForecastingModel('Multi-LGBM')
model2.assign_task(task2)

In [None]:
model2.load_model_from_kaggle(cfg.KAGGLE_USERNAME)

In [None]:
model2.kernel_model

In [None]:
from app.forecasting.explainer import MultiOutputTreeSHAPExplainer
explainer2 = MultiOutputTreeSHAPExplainer(model2)

In [None]:
model2.post_processors = []
model2.post_processors.append(rnd_prc)
model2.post_processors.append(n5d_prc)

In [None]:
model2.predict(X_instance)

In [None]:
explainer2.explain_prediction(X_instance)

In [None]:
orchestrator = TrainingOrchestrator(df)

In [None]:
orchestrator.register_model_for_task(model1, task1)
orchestrator.register_model_for_task(model2, task2)

In [None]:
orchestrator.prepare_all_targets()

In [None]:
vdf = orchestrator.df_with_targets.copy()

In [None]:
vdf[vdf.ticker == 'AAPL'].loc[pd.to_datetime("2025-06-02", utc=True)][-8:]