In [1]:
# notebooks/Stock_AI_Prediction_Interactive_Final.ipynb

# %% [markdown]
# # Phân tích cổ phiếu & Dự đoán giá - Interactive
# 
# Dùng Plotly + ipywidgets để chọn ticker, xem giá thực tế vs dự đoán và trạng thái tăng/giảm.

# %%
# 1. IMPORTS
import sys
import os
import pandas as pd
import joblib
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
import ipywidgets as widgets
from IPython.display import display, clear_output

# Thêm src vào sys.path
sys.path.append(os.path.abspath("../src"))
from ml.load_all_features import load_all_feature_files
from ml.predict import predict_next_day
from ml.prepare import FEATURES

# %%
# 2. LOAD DATA
FEATURE_DIR = os.path.abspath(os.path.join("..", "data", "processed", "features"))

def load_features_from_dir(feature_dir):
    dfs = []
    for file in os.listdir(feature_dir):
        if file.endswith("_features.csv"):
            path = os.path.join(feature_dir, file)
            df = pd.read_csv(path)
            if "Ticker" not in df.columns:
                df["Ticker"] = os.path.basename(file).split("_")[0]
            dfs.append(df)
    if not dfs:
        raise ValueError("No feature files found in " + feature_dir)
    df_all = pd.concat(dfs, ignore_index=True)
    if "Date" in df_all.columns:
        df_all["Date"] = pd.to_datetime(df_all["Date"], errors="coerce")
        df_all = df_all.sort_values(["Ticker", "Date"]).reset_index(drop=True)
    return df_all

df = load_features_from_dir(FEATURE_DIR)
tickers = df['Ticker'].unique()
print(f"Loaded data: {len(df)} rows, {len(tickers)} tickers.")

# %%
# 3. LOAD MODELS HIỆN CÓ
reg_model_path = os.path.abspath(os.path.join("..", "models", "LinearReg.pkl"))
clf_model_path = os.path.abspath(os.path.join("..", "models", "RandomForestClf.pkl"))

reg_model = joblib.load(reg_model_path)
clf_model = joblib.load(clf_model_path)

# %%
# 4. TẠO DROPDOWN CHỌN TICKER
dropdown = widgets.Dropdown(
    options=tickers,
    value=tickers[0],
    description='Ticker:',
    disabled=False,
)

output = widgets.Output()
display(dropdown, output)

# %%
# 5. HÀM UPDATE GRAPH
def update_graph(change):
    ticker = change['new']
    df_t = df[df['Ticker'] == ticker].dropna()
    X = df_t[FEATURES].values
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Regression prediction
    y_pred = reg_model.predict(X_scaled)
    
    # Classification prediction
    y_pred_clf = clf_model.predict(X_scaled)
    df_t['Pred_Up'] = y_pred_clf
    
    next_price = predict_next_day(df_t, reg_model_path)
    
    with output:
        clear_output(wait=True)
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=df_t['Date'], y=df_t['Close'],
                                 mode='lines+markers', name='Actual Close'))
        fig.add_trace(go.Scatter(x=df_t['Date'], y=y_pred,
                                 mode='lines', name='Predicted Close'))
        fig.add_trace(go.Scatter(x=df_t['Date'], y=df_t['Close'],
                                 mode='markers',
                                 marker=dict(color=df_t['Pred_Up'], colorscale='RdBu', size=8),
                                 name='Pred Up/Down'))
        fig.update_layout(title=f"{ticker} - Actual vs Predicted Close",
                          xaxis_title='Date', yaxis_title='Price',
                          legend=dict(y=0.99, x=0.01))
        fig.show()
        
        print(f"Predicted next day price for {ticker}: {next_price:.2f}")

# %%
# 6. KẾT NỐI DROPDOWN
dropdown.observe(update_graph, names='value')

# Hiển thị lần đầu
update_graph({'new': dropdown.value})


Loaded data: 6185 rows, 5 tickers.


Dropdown(description='Ticker:', options=('AAPL', 'AMZN', 'GOOG', 'MSFT', 'TSLA'), value='AAPL')

Output()