In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
#Linear Regression
# Dandelion Optimizer Implementation
class DandelionOptimizer:
    def __init__(self, objective_function, num_features, population_size=20, max_iterations=50):
        self.objective_function = objective_function
        self.num_features = num_features
        self.population_size = population_size
        self.max_iterations = max_iterations
    
    def optimize(self):
        best_solution = np.random.randint(0, 2, self.num_features)
        best_score = self.objective_function(best_solution)
        
        for _ in range(self.max_iterations):
            candidate = np.random.randint(0, 2, self.num_features)
            candidate_score = self.objective_function(candidate)
            
            if candidate_score < best_score:
                best_solution = candidate
                best_score = candidate_score
        
        print("Best Feature Selection Solution:", best_solution)
        return best_solution

def fitness_function(solution, X, y):
    selected_features = X[:, solution == 1]
    if selected_features.shape[1] == 0:
        return float("inf")
    from sklearn.linear_model import LinearRegression
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import mean_absolute_error
    
    X_train, X_test, y_train, y_test = train_test_split(selected_features, y, test_size=0.2, random_state=42)
    if X_train.shape[0] == 0:
        return float("inf")
    
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return mean_absolute_error(y_test, y_pred)

def dandelion_optimizer_optimize(X, y):
    optimizer = DandelionOptimizer(lambda sol: fitness_function(sol, X, y), num_features=X.shape[1])
    return optimizer.optimize()

# Step 1: Load Stock Data
def load_stock_data(file_path):
    try:
        df = pd.read_csv(file_path)
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)
        return df
    except Exception as e:
        print(f"Error loading data: {e}")
        return pd.DataFrame()

# Step 2: Feature Engineering
def create_features(df):
    if df.empty:
        raise ValueError("Stock data is empty. Cannot proceed.")
    df['SMA_10'] = df['Close'].rolling(window=10).mean()
    df['SMA_50'] = df['Close'].rolling(window=50).mean()
    df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA_50'] = df['Close'].ewm(span=50, adjust=False).mean()
    df['Momentum'] = df['Close'].diff(5)
    df['Volatility'] = df['Close'].rolling(window=10).std()
    df.dropna(inplace=True)
    return df

# Step 3: Feature Selection using Dandelion Optimizer
def feature_selection(X, y):
    best_features = dandelion_optimizer_optimize(X, y)
    if np.sum(best_features) == 0:
        print("Warning: No features selected! Using all features.")
        return X  # Use all features if none are selected
    return X[:, best_features == 1]

# Step 4: Prepare Data for LSTM
def prepare_data(df):
    feature_cols = ['SMA_10', 'SMA_50', 'EMA_10', 'EMA_50', 'Momentum', 'Volatility']
    X = df[feature_cols].values
    y = df['Close'].values
    
    X_selected = feature_selection(X, y)
    
    scaler_X = MinMaxScaler()
    X_scaled = scaler_X.fit_transform(X_selected)
    scaler_y = MinMaxScaler()
    y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))
    
    train_size = int(0.8 * len(X))
    if train_size == 0:
        raise ValueError("Not enough data for training. Check stock data.")
    
    X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
    y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]
    
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    return X_train, X_test, y_train, y_test, scaler_X, scaler_y

# Step 5: Build LSTM Model
def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Step 6: Train and Predict
def train_predict_lstm(X_train, X_test, y_train, y_test, scaler_y):
    model = build_lstm_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), verbose=1)
    
    predictions = model.predict(X_test)
    predictions = scaler_y.inverse_transform(predictions)
    y_actual = scaler_y.inverse_transform(y_test)
    
    mse = mean_squared_error(y_actual, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_actual, predictions)
    r2 = r2_score(y_actual, predictions)
    
    print(f"MSE: {mse}, RMSE: {rmse}, MAE: {mae}, R2: {r2}")
    plt.figure(figsize=(12, 6))
    plt.plot(y_actual, label='Actual Price', color='blue')
    plt.plot(predictions, label='Predicted Price', color='red')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.title('Actual vs Predicted Stock Prices')
    plt.legend()
    # Save the figure
    plt.savefig('figLi5.png', dpi=300, bbox_inches='tight')
    plt.show()
    return model, predictions

# Main Execution
if __name__ == "__main__":
    file_path = 'NSE-TATAGLOBAL.csv'
    df = load_stock_data(file_path)
    if df.empty:
        raise ValueError("Stock data retrieval failed. Exiting.")
    
    df = create_features(df)
    X_train, X_test, y_train, y_test, scaler_X, scaler_y = prepare_data(df)
    model, predictions = train_predict_lstm(X_train, X_test, y_train, y_test, scaler_y)
    
    print("Prediction Complete.")

Best Feature Selection Solution: [0 1 1 0 1 0]
Epoch 1/500


  super().__init__(**kwargs)


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 27ms/step - loss: 0.0636 - val_loss: 0.0154
Epoch 2/500
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0055 - val_loss: 0.0013
Epoch 3/500
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0027 - val_loss: 0.0011
Epoch 4/500
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0021 - val_loss: 0.0012
Epoch 5/500
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0022 - val_loss: 0.0012
Epoch 6/500
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0018 - val_loss: 0.0014
Epoch 7/500
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0019 - val_loss: 8.7645e-04
Epoch 8/500
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0018 - val_loss: 9.0546e-04
Epoch 9/500
[1m50/50[0m [32m━━━━━━━━━━━━━━