In [None]:
pip install yfinance

In [None]:
pip install tensorflow==2.16.1

In [None]:
import tensorflow as tf

In [None]:
print(tf.__version__)

In [None]:
import numpy as np
import pandas as pd
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr
yf.pdr_override()
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from keras.models import Sequential
from keras.layers import Input, Dense, LSTM, GRU, Dropout, Conv1D, MaxPooling1D, Flatten
from keras.callbacks import EarlyStopping
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

In [None]:
# Download the data
# Load and preprocess data
data_path = '/kaggle/input/egx30-data1/final.csv'
egx30 = pd.read_csv(data_path)
egx30['Date'] = pd.to_datetime(egx30['Date'])

# Create DataFrame
egx30 = pd.DataFrame(egx30)

# Set 'Date' column as the index
egx30.sort_values('Date', inplace=True)
egx30.set_index('Date', inplace=True)
egx30['Close'] = pd.to_numeric(egx30['Close'].str.replace(',', ''), errors='coerce')
# Function to convert volume to a uniform unit
def convert_volume(vol):
    if isinstance(vol, str):
        if 'M' in vol:
            return float(vol.replace('M', '')) * 1e6
        elif 'B' in vol:
            return float(vol.replace('B', '')) * 1e9
        else:
            return float(vol)
    return vol

# Apply the conversion to the 'Vol.' column
egx30['Volume'] = egx30['Volume'].apply(convert_volume)

#'^IXIC' , 'DJI',
#'AAPL', 'GOOG', 'MSFT', 'AMZN' ,
tech_list = [ 'AAPL', 'GOOG', 'MSFT', 'AMZN']
end = datetime.now()
start = datetime(end.year - 15, end.month, end.day)

company_data = {}
for stock in tech_list:
    company_data[stock] = yf.download(stock, start, end)
company_data['EGX30'] = egx30

# Function to add features like moving averages
def add_features(df):
    df['MA_10'] = df['Close'].rolling(window=10).mean()
    df['MA_50'] = df['Close'].rolling(window=50).mean()
    df['MA_200'] = df['Close'].rolling(window=200).mean()
    df['Volatility'] = df['Close'].rolling(window=10).std()
    df['Volume'] = df['Volume']
    df.dropna(inplace=True)
    return df

In [None]:


# Function to create dataset
def create_dataset(df, time_step=60):
    data = df[['Close']].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    if len(data) < time_step + 1:
        return None, None, None  # Not enough data to create the dataset
    data_scaled = scaler.fit_transform(data)
    X, Y = [], []
    for i in range(len(data_scaled) - time_step):
        X.append(data_scaled[i:i + time_step])
        Y.append(data_scaled[i + time_step, 0])
    X, Y = np.array(X), np.array(Y)
    return X, Y, scaler

# Function to build and train the LSTM model with cross-validation
def build_and_train_model(X_train, Y_train, X_val, Y_val, epochs=10, model_type='LSTM'):
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))
    if model_type == 'LSTM':
        model.add(LSTM(100, return_sequences=True))
    elif model_type == 'GRU':
        model.add(GRU(100, return_sequences=True))
    elif model_type == 'Hybrid':
        model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
        model.add(MaxPooling1D(pool_size=2))
        model.add(Flatten())
        model.add(Dense(50, activation='relu'))
    model.add(Dropout(0.3))
    if model_type in ['LSTM', 'GRU']:
        model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.3))
    model.add(Dense(50))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    early_stop = EarlyStopping(monitor='val_loss', patience=10)
    model.fit(X_train, Y_train, epochs=epochs, batch_size=64, validation_data=(X_val, Y_val), callbacks=[early_stop], verbose=2)
    return model

# Split data into training, validation, and testing sets
def split_data(X, Y, train_size=0.8, val_size=0.1, test_size=0.1):
    assert train_size + val_size + test_size == 1
    train_split_index = int(len(X) * train_size)
    val_split_index = int(len(X) * (train_size + val_size))
    X_train, X_val, X_test = X[:train_split_index], X[train_split_index:val_split_index], X[val_split_index:]
    Y_train, Y_val, Y_test = Y[:train_split_index], Y[train_split_index:val_split_index], Y[val_split_index:]
    return X_train, X_val, X_test, Y_train, Y_val, Y_test

# Hyperparameter tuning using grid search
def grid_search(X_train, Y_train, X_val, Y_val):
    best_rmse = float('inf')
    best_params = {}
    units= 50
    dropout = 0.2
    batch_size = 32
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(units, return_sequences=True))
    model.add(Dropout(dropout))
    model.add(LSTM(units, return_sequences=False))
    model.add(Dropout(dropout))
    model.add(Dense(50))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    early_stop = EarlyStopping(monitor='val_loss', patience=10)
    model.fit(X_train, Y_train, epochs=50, batch_size=batch_size, validation_data=(X_val, Y_val), callbacks=[early_stop], verbose=0)
    predictions = model.predict(X_val)
    rmse = np.sqrt(mean_squared_error(Y_val, predictions))


# Cross-validation using K-Fold
def k_fold_cross_validation(X, Y, k=5, model_type='LSTM'):
    kf = KFold(n_splits=k)
    rmse_scores = []
    for train_index, val_index in kf.split(X):
        X_train, X_val = X[train_index], X[val_index]
        Y_train, Y_val = Y[train_index], Y[val_index]
        model = build_and_train_model(X_train, Y_train, X_val, Y_val, epochs=100, model_type=model_type)
        predictions = model.predict(X_val)
        rmse = np.sqrt(mean_squared_error(Y_val, predictions))
        rmse_scores.append(rmse)
    return np.mean(rmse_scores), np.std(rmse_scores)

# Training separate models for each company
models = {}
scalers = {}
for stock in ['AAPL', 'GOOG', 'MSFT', 'AMZN' , 'EGX30']:
    df = company_data[stock]
    X, Y, scaler = create_dataset(df)
    if X is None or Y is None:
        print(f"Not enough data for {stock} after preprocessing.")
        continue
    X_train, X_val, X_test, Y_train, Y_val, Y_test = split_data(X, Y, train_size=0.8, val_size=0.1, test_size=0.1)
    grid_search(X_train, Y_train, X_val, Y_val)
    print(f" {stock}")
    model = build_and_train_model(X_train, Y_train, X_val, Y_val, epochs=100, model_type='LSTM')
    # Save the model with the stock name
    model.save(f'{stock}_bilstm_model.h5')
    models[stock] = model
    scalers[stock] = scaler


# Function to predict the next 30 days
def predict_next_30_days(stock, last_x_days, time_step=60):
    model = models[stock]
    scaler = scalers[stock]
    last_x_days_scaled = scaler.transform(last_x_days)
    next_30_days_predicted = []

    for _ in range(30):
        next_pred = model.predict(last_x_days_scaled.reshape(1, time_step, last_x_days_scaled.shape[1]))
        next_pred_full = np.concatenate((next_pred, np.zeros((next_pred.shape[0], 5))), axis=1)
        next_30_days_predicted.append(next_pred[0])
        last_x_days_scaled = np.append(last_x_days_scaled[1:], next_pred_full, axis=0)

    next_30_days_predicted = scaler.inverse_transform(np.concatenate((np.array(next_30_days_predicted), np.zeros((30, 5))), axis=1))[:, 0]
    return next_30_days_predicted

# Function to predict and plot results for the last 30 days
def predict_and_plot(stock, X_test, Y_test, scaler):
    model = models[stock]
    predictions = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(Y_test, predictions))
    r2 = r2_score(Y_test, predictions)
    print(f'{stock} Test RMSE: {rmse}')
    print(f'{stock} R^2 Score: {r2}')

    predictions = scaler.inverse_transform(np.concatenate((predictions, np.zeros((predictions.shape[0], 5))), axis=1))[:, 0]
    Y_test = scaler.inverse_transform(np.concatenate((Y_test.reshape(-1, 1), np.zeros((Y_test.shape[0], 5))), axis=1))[:, 0]

    plt.figure(figsize=(14, 5))
    plt.plot(company_data[stock].index[-len(Y_test):], Y_test, color='green', label='Actual Price')
    plt.plot(company_data[stock].index[-len(Y_test):], predictions, color='red', label='Predicted Price')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title(f'{stock} Price Prediction')
    plt.legend()
    plt.show()

# Predict and plot results for the last 30 days for each company
for stock in ['AAPL', 'GOOG', 'MSFT', 'AMZN' , 'EGX30']:
    df = company_data[stock]
    X, Y, scaler = create_dataset(df)
    X_train, X_val, X_test, Y_train, Y_val, Y_test = split_data(X, Y, train_size=0.8, val_size=0.1, test_size=0.1)
    predict_and_plot(stock, X_test, Y_test, scaler)

# Predicting and plotting the next 30 days for all companies
plt.figure(figsize=(14, 7))
for stock in ['EGX30']:
    last_x_days = company_data[stock][['Close']].values[-60:]
    next_30_days_prediction = predict_next_30_days(stock, last_x_days)
    future_dates = [datetime.now() + timedelta(days=i) for i in range(1, 31)]
    plt.plot(future_dates, next_30_days_prediction, label=f'{stock} Next 30 Days Predictions')

plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Next 30 Days Price Prediction for All indices')
plt.legend()
plt.show()

# Predicting and plotting the next 30 days for all companies
plt.figure(figsize=(14, 7))
for stock in ['AAPL', 'GOOG', 'MSFT', 'AMZN' ]:
    last_x_days = company_data[stock][['Close', 'MA_10', 'MA_50', 'MA_200', 'Volatility', 'Volume']].values[-60:]
    next_30_days_prediction = predict_next_30_days(stock, last_x_days)
    future_dates = [datetime.now() + timedelta(days=i) for i in range(1, 31)]
    plt.plot(future_dates, next_30_days_prediction, label=f'{stock} Next 30 Days Predictions')

plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Next 30 Days Price Prediction for All Companies')
plt.legend()
plt.show()
