In [1]:
import os
import pandas as pd

In [2]:
csv_file = "data/stock_training_2023-01-01_2024-12-31.csv"
if not os.path.exists(csv_file):
    raise FileNotFoundError(
        f"Please run data_fetcher.py to download the data first.")
else:
    df_all = pd.read_csv(csv_file)

In [3]:
from data.data_fetcher import get_stock_df
from data.indicator import add_rsi
import numpy as np

             requires requests_html, which is not installed.
             
             Install using: 
             pip install requests_html
             
             After installation, you may have to restart your Python session.


In [37]:
def add_rsi(df, column='Close', period=14):
    """
    Add the Relative Strength Index (RSI) for a given DataFrame.
    
    Parameters:
    df (pd.DataFrame): DataFrame containing price data.
    column (str): Column name for the closing price.
    period (int): Lookback period for RSI calculation (default=14).
    
    Returns:
    pd.Series: RSI values.
    """
    delta = df[column].diff(1)  # Calculate price changes

    # Separate gains and losses
    gain = np.where(delta > 0, delta, 0)
    loss = np.where(delta < 0, -delta, 0)

    # Use exponential moving average (EMA) for stability
    avg_gain = pd.Series(gain).ewm(span=period, min_periods=period).mean()
    avg_loss = pd.Series(loss).ewm(span=period, min_periods=period).mean()
    # print(avg_gain)
    # print(avg_loss)

    # Compute Relative Strength (RS)
    rs = avg_gain / (avg_loss + 1e-10)  # Avoid division by zero
    
    # Compute RSI
    rsi = 100 - (100 / (1 + rs))
    rsi = rsi.fillna(method="bfill")
    # print(rsi.type)

    df["RSI_14"] = rsi.values
    # df["RSI_14"] = rsi.fillna(method="bfill")

    # df["RSI_14"] = rsi

    return df

In [38]:
df = get_stock_df(df_all, "GOOGL")
df = df.drop('RSI_14', axis=1)
df = add_rsi(df, column='Close', period=14)

In [39]:
df.shape

(500, 57)

In [40]:
df["RSI_14"]

1000    73.308174
1001    73.308174
1002    73.308174
1003    73.308174
1004    73.308174
          ...    
1495    66.314562
1496    68.387570
1497    66.754179
1498    57.872627
1499    53.478638
Name: RSI_14, Length: 500, dtype: float64

In [None]:
stocks = ["TSLA"]
for i, stock in enumerate(stocks):
        print(">>>>>>stock: ", stock)
        try:
            df = get_stock_df(df_all, stock)
            features, labels, dates = create_batch_feature(df)
            if np.isnan(features).any() or np.isnan(labels).any():
                print(f"NaN detected in {stock}")
                continue
            if np.isinf(features).any() or np.isinf(labels).any():
                print(f"INF detected in {stock}")
                continue
        except:
            print(f"Error in processing {stock}")
            continue
        if i == 0:
            all_features, all_labels, all_dates = features, labels, dates
        else:
            all_features = np.concatenate((all_features, features), axis=0)
            all_labels = np.concatenate((all_labels, labels), axis=0)
            all_dates = np.concatenate((all_dates, dates))
    print("total # of data samples: ", all_features.shape[0])

    train_loader, test_dataset, idx_test = split_train_test_data(
        all_features, all_labels, batch_size=128)

    model = PredictionModel(feature_len=all_features.shape[2],
                            seq_len=all_features.shape[1],
                            encoder_type=ENCODER_TYPE).to(device)
    model.load_state_dict(torch.load('./model/model.pth'))
    model.eval()
    criterion = CustomLoss()

    total_params = sum(p.numel() for p in model.parameters())
    print("total # of model params: ", total_params)
    predicted_labels, pr_table, dates_table = eval_model(
        model, criterion, test_dataset, all_dates[idx_test])
    print(pr_table)