In [55]:
from utils import get_stock_price_data, LIST_STOCK
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.metrics import classification_report
from tqdm import tqdm

In [56]:
time_span = 7
history_cols = [f'close_t{i}' for i in range(-time_span+1, 1)]
X = np.array(range(0, len(history_cols))).reshape(-1, 1)
def compute_slope_history(row):
    y = row[history_cols].values.astype(float)
    model = LinearRegression()
    model.fit(X, y)
    return model.coef_[0]

In [57]:
all_preds = []
all_truths = []
reports = []
threshold = 0.03 / time_span # Tăng giảm 3% trong time_span
for symbol in tqdm(LIST_STOCK):
    df = get_stock_price_data(symbol, with_ground_truth=True)
    df.drop(['open', 'high', 'low', 'volume', 'close_t0', 'close_t1', 'close_t2', 'close_t3', 'close_t4', 'close_t5', 'close_t6'], axis=1, inplace=True)
    for i in range(-time_span+1, 1):
        df[f'close_t{i}'] = df['close'].shift(-i)
    df.dropna(inplace=True)
    for i in range(0, -time_span, -1):
        df[f'close_t{i}'] = df[f'close_t{i}']/df[f'close_t{-time_span+1}']

    coef_col_name = f'coef_close_t{-time_span+1}_to_close_t0'
    df[coef_col_name] = df.apply(compute_slope_history, axis=1)
    df[f'predict'] = np.where(
        df[coef_col_name] >= threshold, 'up',
        np.where(df[coef_col_name] <= -threshold, 'down', 'sideways')
    )
    preds = df["predict"].astype(str).str.lower()
    truths = df["ground_truth"].astype(str).str.lower()
    reports.append((symbol, classification_report(preds, truths, output_dict=True)))
    all_preds.extend(preds)
    all_truths.extend(truths)

100%|██████████| 164/164 [00:44<00:00,  3.65it/s]


In [60]:
report = classification_report(all_truths, all_preds, digits=5)
print(report)

              precision    recall  f1-score   support

        down    0.23146   0.23093   0.23120     16425
    sideways    0.60012   0.59876   0.59944     42737
          up    0.28258   0.28452   0.28355     19700

    accuracy                        0.44365     78862
   macro avg    0.37139   0.37140   0.37139     78862
weighted avg    0.44401   0.44365   0.44383     78862



In [65]:
def classification_report_to_latex(report_dict):
    lines = []
    lines.append(r"\begin{table}[H]")
    lines.append(r"\centering")
    lines.append(r"\begin{tabular}{lcccc}")
    lines.append(r"\hline")
    lines.append(r"\textbf{} & \textbf{Precision} & \textbf{Recall} & \textbf{F1-Score} & \textbf{Support} \\")
    lines.append(r"\hline")
    
    # Rows for classes
    for label in ['down', 'sideways', 'up']:
        row = report_dict[label]
        line = f"\\textbf{{{label}}} & {row['precision']:.5f} & {row['recall']:.5f} & {row['f1-score']:.5f} & {int(row['support'])} \\\\"
        lines.append(line)

    lines.append(r"\hline")
    
    # Accuracy
    accuracy = report_dict['accuracy']
    lines.append(f"\\textbf{{accuracy}} & & & {accuracy:.5f} & {int(report_dict['macro avg']['support'])} \\\\")
    
    # Macro avg
    macro = report_dict['macro avg']
    lines.append(f"\\textbf{{macro avg}} & {macro['precision']:.5f} & {macro['recall']:.5f} & {macro['f1-score']:.5f} & {int(macro['support'])} \\\\")
    
    # Weighted avg
    weighted = report_dict['weighted avg']
    lines.append(f"\\textbf{{weighted avg}} & {weighted['precision']:.5f} & {weighted['recall']:.5f} & {weighted['f1-score']:.5f} & {int(weighted['support'])} \\\\")
    
    lines.append(r"\hline")
    lines.append(r"\end{tabular}")
    lines.append(r"\caption{Classification Report}")
    lines.append(r"\end{table}")
    
    latex_code = "\n".join(lines)
    print(latex_code)


In [84]:
classification_report_to_latex(classification_report(all_truths, all_preds, output_dict=True))

\begin{table}[H]
\centering
\begin{tabular}{lcccc}
\hline
\textbf{} & \textbf{Precision} & \textbf{Recall} & \textbf{F1-Score} & \textbf{Support} \\
\hline
\textbf{down} & 0.23146 & 0.23093 & 0.23120 & 16425 \\
\textbf{sideways} & 0.60012 & 0.59876 & 0.59944 & 42737 \\
\textbf{up} & 0.28258 & 0.28452 & 0.28355 & 19700 \\
\hline
\textbf{accuracy} & & & 0.44365 & 78862 \\
\textbf{macro avg} & 0.37139 & 0.37140 & 0.37139 & 78862 \\
\textbf{weighted avg} & 0.44401 & 0.44365 & 0.44383 & 78862 \\
\hline
\end{tabular}
\caption{Classification Report}
\end{table}


In [70]:
reports.sort(key=lambda x: x[1]['weighted avg']['f1-score'], reverse=True)

In [89]:
for x in reports[:10]:
    print(f"{x[0]} ({x[1]['weighted avg']['f1-score']:.5f})", end=', ')

THD (0.92900), SSH (0.89481), DSN (0.85326), ODE (0.80670), KDC (0.79055), SMB (0.78645), VSH (0.68952), EID (0.67432), VJC (0.64413), VNS (0.64403), 

In [90]:
for x in reports[-10:]:
    print(f"{x[0]} ({x[1]['weighted avg']['f1-score']:.5f})", end=', ')

MWG (0.33029), SKG (0.32258), VIF (0.31032), VGI (0.31017), MSR (0.30619), NKG (0.30144), PLC (0.30095), ELC (0.29997), BCG (0.29723), SBD (0.29518), 