In [4]:
from tvDatafeed import TvDatafeed, Interval

In [9]:
tv = TvDatafeed()



In [10]:
# index
comb = tv.get_hist(symbol='COMB.N0000',exchange='CSELK',interval=Interval.in_daily,n_bars=10000)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mplfinance as mpf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from ta import add_all_ta_features
from ta.utils import dropna

# Example NumPy array (replace this with your actual data)
data = np.array(comb)  # data.shape - (3030, 6)

# Convert the NumPy array to a DataFrame
columns = ["symbol", "open", "high", "low", "close", "volume"]
df = pd.DataFrame(data, columns=columns)

# Convert relevant columns to numeric
df["open"] = df["open"].astype(float)
df["high"] = df["high"].astype(float)
df["low"] = df["low"].astype(float)
df["close"] = df["close"].astype(float)
df["volume"] = df["volume"].astype(float)

# Ensure the DataFrame index is a DatetimeIndex
df['date'] = pd.date_range(start='2023-01-01', periods=len(df), freq='D')
df.set_index('date', inplace=True)

# Add technical indicators
df = dropna(df)  # Drop NaN values
df = add_all_ta_features(
    df, open="open", high="high", low="low", close="close", volume="volume", fillna=True
)

# Identify support and resistance levels
def identify_support_resistance(df, window=5):
    df['support'] = df['low'].rolling(window=window, center=True).min()
    df['resistance'] = df['high'].rolling(window=window, center=True).max()
    return df

df = identify_support_resistance(df)

# Define features and target
features = [
    "open", "high", "low", "close", "volume",  # Original features
    "trend_macd", "trend_macd_signal", "trend_macd_diff",  # MACD
    "momentum_rsi",  # RSI
    "volatility_bbm", "volatility_bbh", "volatility_bbl",  # Bollinger Bands
    "trend_ema_fast", "trend_ema_slow",  # Exponential Moving Averages
    "support", "resistance"  # Support and Resistance
]
target = "close"

# Preprocess the data
def preprocess_data(df, features, target, time_steps=60):
    if len(df) < time_steps + 1:
        raise ValueError(f"Not enough data to create sequences. Required: {time_steps + 1}, Available: {len(df)}")
    
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df[features])
    
    X, y = [], []
    for i in range(time_steps, len(scaled_data)):
        X.append(scaled_data[i-time_steps:i, :])
        y.append(scaled_data[i, features.index(target)])
    
    return np.array(X), np.array(y), scaler

# Define time steps and preprocess data
time_steps = 60  # Use 60 time steps for better context
try:
    X, y, scaler = preprocess_data(df, features, target, time_steps)
except ValueError as e:
    print(e)
    exit()

# Split data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build the LSTM model
def build_lstm_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        LSTM(150, return_sequences=True),
        Dropout(0.3),
        LSTM(150, return_sequences=False),
        Dropout(0.3),
        Dense(100),
        Dense(1)
    ])
    model.compile(optimizer="adam", loss="mean_squared_error")
    return model

# Create and train the model
model = build_lstm_model((X_train.shape[1], X_train.shape[2]))
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Plot training and validation loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title("Training and Validation Loss")
plt.show()

# Function to predict future prices (updated with debug statements)
def predict_future_prices(model, last_sequence, scaler, time_steps, future_days):
    if last_sequence.shape[0] < time_steps:
        raise ValueError(f"last_sequence must have at least {time_steps} time steps. Found: {last_sequence.shape[0]}")
    
    predictions = []
    current_sequence = last_sequence[-time_steps:].copy()
    
    for _ in range(future_days):
        next_prediction = model.predict(current_sequence[np.newaxis, :, :])[0, 0]
        predictions.append(next_prediction)
        
        # Update the sequence with the new prediction
        new_row = np.append(current_sequence[-1, 1:], next_prediction).reshape(1, -1)
        current_sequence = np.vstack([current_sequence, new_row])
        current_sequence = current_sequence[1:]  # Remove the oldest time step
    
    predictions = np.array(predictions).reshape(-1, 1)
    close_scaler = MinMaxScaler()
    close_scaler.min_, close_scaler.scale_ = scaler.min_[features.index(target)], scaler.scale_[features.index(target)]
    predictions = close_scaler.inverse_transform(predictions)
    return predictions

# Predict prices for the next month (30 days)
try:
    last_sequence = X_test[-1]  # Use the last sequence from the test data
    future_days = 30  # Predict for the next 30 days
    predictions = predict_future_prices(model, last_sequence, scaler, time_steps, future_days)
except ValueError as e:
    print(f"Error in predict_future_prices: {e}")
    exit()

# Convert predictions into DataFrame
dates = pd.date_range(start=df.index[-1], periods=future_days + 1, freq='D')[1:]
pred_df = pd.DataFrame(predictions, columns=["close"], index=dates)
pred_df["open"] = pred_df["close"].shift(1)
pred_df["open"].fillna(pred_df["close"].iloc[0], inplace=True)
pred_df["high"] = pred_df[["open", "close"]].max(axis=1)
pred_df["low"] = pred_df[["open", "close"]].min(axis=1)
pred_df["volume"] = 0  # No volume for predictions

# Plot original data as candlestick
mpf.plot(df[-100:], type='candle', style='charles', title="Stock Price History", ylabel="Price")

# Plot future predictions as candlestick
if not pred_df.empty:
    mpf.plot(pred_df.dropna(), type='candle', style='charles', title="Next Month Price Prediction", ylabel="Price")
else:
    print("No predictions to plot. Check the predict_future_prices function.")

Epoch 1/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 206ms/step - loss: nan - val_loss: nan
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 162ms/step - loss: nan - val_loss: nan
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 190ms/step - loss: nan - val_loss: nan
Epoch 4/100
[1m51/75[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m2s[0m 116ms/step - loss: nan