In [1]:
import os
current_directory = os.getcwd()
print(current_directory)
dir_base = os.path.join(current_directory,'Dataset')
print(dir_base)

/home/ladans/DNN/Project
/home/ladans/DNN/Project/Dataset


In [2]:
import pandas as pd
import glob
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder
from keras.models import Model
from keras.layers import Embedding, LSTM, Dense, Input, Concatenate
from keras.layers import Flatten, RepeatVector

# Directory containing CSV files
data_dir = dir_base

# Initialize an empty DataFrame
all_data = pd.DataFrame()

# Iterate over each file in the directory
for file_path in glob.glob(os.path.join(data_dir, "*.csv")):
    # Extract stock symbol from the filename
    stock_symbol = os.path.basename(file_path).split('.')[0]
    
    # Load the CSV
    df = pd.read_csv(file_path)
    
    # Add a column for the stock symbol
    df['Stock_Symbol'] = stock_symbol
    
    # Optional: Select only necessary columns
    df = df[['Date', 'Open', 'Close', 'Stock_Symbol']]
    
    # Append to the main DataFrame
    all_data = pd.concat([all_data, df], ignore_index=True)

# Sort by date and stock symbol if needed
all_data = all_data.sort_values(['Stock_Symbol', 'Date']).reset_index(drop=True)

# Encode stock symbols
encoder = LabelEncoder()
all_data['Stock_Index'] = encoder.fit_transform(all_data['Stock_Symbol'])
all_data.head()

2024-11-11 14:44:48.420822: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731336288.447426   71655 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731336288.453810   71655 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-11 14:44:48.484319: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Unnamed: 0,Date,Open,Close,Stock_Symbol,Stock_Index
0,2012-09-04,95.108574,96.424286,AAPL,0
1,2012-09-05,96.510002,95.747147,AAPL,0
2,2012-09-06,96.167145,96.610001,AAPL,0
3,2012-09-07,96.864288,97.205711,AAPL,0
4,2012-09-10,97.207146,94.677139,AAPL,0


In [3]:
from sklearn.metrics import mean_squared_error
from pyswarm import pso

test_ratio=0.2
def train_and_evaluate(sequence_length):
    print(f"Evaluating sequence length: {sequence_length}")
    sequence_length = int(sequence_length[0])  # Extract the integer value

    # Re-initialize lists for inputs and outputs
    
    X, y, stock_indices = [], [], []
    
    for stock in all_data['Stock_Symbol'].unique():
        stock_data = all_data[all_data['Stock_Symbol'] == stock]
        prices = stock_data['Close'].values
        normalized_prices = prices / prices[0] - 1  # Normalize
        
        for i in range(len(normalized_prices) - sequence_length):
            X.append(normalized_prices[i: i + sequence_length])
            y.append(normalized_prices[i + sequence_length])
            stock_indices.append(stock_data['Stock_Index'].iloc[0])

    # Convert to numpy arrays and split into train-test sets
    X = np.array(X)
    y = np.array(y)
    stock_indices = np.array(stock_indices)
    train_size = int(len(X) * (1 - test_ratio))
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    stock_indices_train, stock_indices_test = stock_indices[:train_size], stock_indices[train_size:]

    #Model:
    # Number of unique stocks and embedding dimensions
    num_stocks = len(all_data['Stock_Symbol'].unique())
    embedding_size = 8
    

    # Define and compile model
    price_input = Input(shape=(sequence_length, 1), name='price_input')
    stock_input = Input(shape=(1,), name='stock_input')
    stock_embedding = Embedding(input_dim=num_stocks, output_dim=embedding_size, input_length=1)(stock_input)
    stock_embedding = Flatten()(stock_embedding)
    stock_embedding = RepeatVector(sequence_length)(stock_embedding)
    merged_input = Concatenate(axis=2)([price_input, stock_embedding])
    lstm_out = LSTM(units=128)(merged_input)
    output = Dense(units=1)(lstm_out)
    model = Model(inputs=[price_input, stock_input], outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train model
    model.fit([X_train, stock_indices_train], y_train, epochs=5, batch_size=1, verbose=0)  # Adjust epochs for quick evaluation
    
    # Predict and calculate RMSE
    predicted_prices = model.predict([X_test, stock_indices_test])
    rmse = mean_squared_error(y_test, predicted_prices, squared=False)
    print(f"Sequence length {sequence_length} gives RMSE: {rmse}")
    return rmse
print('done')

done


In [4]:
# Define bounds for sequence_length as lists
lb, ub = [10], [60]

# Run PSO to find the optimal sequence_length
optimal_sequence_length, optimal_rmse = pso(train_and_evaluate, lb, ub, maxiter=5)

print("Optimal sequence length:", optimal_sequence_length)
print("RMSE for optimal sequence length:", optimal_rmse)


Evaluating sequence length: [16.11315692]


W0000 00:00:1731336290.915687   71655 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
Sequence length 16 gives RMSE: 0.026498959022569223
Evaluating sequence length: [58.02958729]




[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Sequence length 58 gives RMSE: 0.05354387750419304
Evaluating sequence length: [29.30013301]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
Sequence length 29 gives RMSE: 0.033043175002394766
Evaluating sequence length: [11.93334693]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
Sequence length 11 gives RMSE: 0.023801462609103374
Evaluating sequence length: [48.11895984]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
Sequence length 48 gives RMSE: 0.023217931202407994
Evaluating sequence length: [43.11568617]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
Sequence length 43 gives RMSE: 0.02427214203576114
Evaluating sequence length: [32.71286776]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
Sequence length 32 gives RMSE: 0.023258436422284114
Evaluating sequence length: [44.31809217]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
Sequence length 44 gives RMSE: 0.03162009518085244
Evaluating sequence length: [48.72167528]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
Sequence length 48 gives RMSE: 0.03489887232880403
Evaluating sequence length: [22.97179898]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Sequence length 22 gives RMSE: 0.02305607826309722
Evaluating sequence length: [45.73436767]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
Sequence length 45 gives RMSE: 0.028056847343958132
Evaluating sequence length: [30.39917174]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
Sequence length 30 gives RMSE: 0.0232431220177917
Evaluating sequence length: [43.96432238]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
Sequence length 43 gives RMSE: 0.022726008201912205
Evaluating sequence length: [29.9056514]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
Sequence length 29 gives RMSE: 0.026045452589043557
Evaluating sequence length: [20.02879764]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Sequence length 20 gives RMSE: 0.03598846074212105
Evaluating sequence length: [22.94852087]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
Sequence length 22 gives RMSE: 0.0229494280967534
Evaluating sequence length: [32.81987631]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Sequence length 32 gives RMSE: 0.0246725686059275
Evaluating sequence length: [20.61900261]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step
Sequence length 20 gives RMSE: 0.022855516332104732
Evaluating sequence length: [42.58832341]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
Sequence length 42 gives RMSE: 0.04193450446249034
Evaluating sequence length: [27.77174705]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
Sequence length 27 gives RMSE: 0.02333548279760471
Evaluating sequence length: [11.31273728]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Sequence length 11 gives RMSE: 0.024163136247046697
Evaluating sequence length: [39.90710261]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
Sequence length 39 gives RMSE: 0.02535127293188883
Evaluating sequence length: [31.43298851]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Sequence length 31 gives RMSE: 0.024533090748202133
Evaluating sequence length: [49.44872768]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
Sequence length 49 gives RMSE: 0.02504557600022374
Evaluating sequence length: [19.46228891]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Sequence length 19 gives RMSE: 0.040998339679110346
Evaluating sequence length: [10.95584487]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step
Sequence length 10 gives RMSE: 0.02785961781198618
Evaluating sequence length: [52.08117393]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
Sequence length 52 gives RMSE: 0.022902213233937728
Evaluating sequence length: [39.1781826]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
Sequence length 39 gives RMSE: 0.04429491879409315
Evaluating sequence length: [28.88317035]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Sequence length 28 gives RMSE: 0.023907537461367055
Evaluating sequence length: [39.50056755]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
Sequence length 39 gives RMSE: 0.023012535014197973
Evaluating sequence length: [33.95002393]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Sequence length 33 gives RMSE: 0.023092509108980162
Evaluating sequence length: [16.99012345]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
Sequence length 16 gives RMSE: 0.023347954366771603
Evaluating sequence length: [50.48748437]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
Sequence length 50 gives RMSE: 0.034961183258524785
Evaluating sequence length: [45.52209177]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
Sequence length 45 gives RMSE: 0.02317621616584704
Evaluating sequence length: [35.30026295]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
Sequence length 35 gives RMSE: 0.024308946131380113
Evaluating sequence length: [55.10096517]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step
Sequence length 55 gives RMSE: 0.026442714899769647
Evaluating sequence length: [14.50487927]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
Sequence length 14 gives RMSE: 0.023301558333275223
Evaluating sequence length: [29.00784385]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
Sequence length 29 gives RMSE: 0.04476676960182406
Evaluating sequence length: [36.95680047]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
Sequence length 36 gives RMSE: 0.022805068926255348
Evaluating sequence length: [40.02868741]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
Sequence length 40 gives RMSE: 0.023468947600113674
Evaluating sequence length: [18.06789046]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Sequence length 18 gives RMSE: 0.023191783966626554
Evaluating sequence length: [19.42119485]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Sequence length 19 gives RMSE: 0.03129176359254146
Evaluating sequence length: [12.41434095]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Sequence length 12 gives RMSE: 0.032920259620312295
Evaluating sequence length: [23.02337126]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
Sequence length 23 gives RMSE: 0.025399666604662276
Evaluating sequence length: [10.02899271]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Sequence length 10 gives RMSE: 0.026604187044341365
Evaluating sequence length: [20.63995505]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
Sequence length 20 gives RMSE: 0.028087780614522117
Evaluating sequence length: [46.19885641]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
Sequence length 46 gives RMSE: 0.023967500920042403
Evaluating sequence length: [22.80298271]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
Sequence length 22 gives RMSE: 0.022696401792977373
Evaluating sequence length: [46.70882211]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
Sequence length 46 gives RMSE: 0.022728771624280674
Evaluating sequence length: [40.93806023]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
Sequence length 40 gives RMSE: 0.022704946308738042
Evaluating sequence length: [28.10728019]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
Sequence length 28 gives RMSE: 0.029103674153758276
Evaluating sequence length: [28.16912261]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
Sequence length 28 gives RMSE: 0.03816925077432191
Evaluating sequence length: [38.76478274]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
Sequence length 38 gives RMSE: 0.03179843884972491
Evaluating sequence length: [32.95461075]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
Sequence length 32 gives RMSE: 0.03605734993696051
Evaluating sequence length: [37.26055873]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
Sequence length 37 gives RMSE: 0.038197046049194036
Evaluating sequence length: [18.67192278]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Sequence length 18 gives RMSE: 0.022902932500653048
Evaluating sequence length: [13.18541437]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
Sequence length 13 gives RMSE: 0.030495227072990436
Evaluating sequence length: [56.13048655]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
Sequence length 56 gives RMSE: 0.03634689051528027
Evaluating sequence length: [57.13625157]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
Sequence length 57 gives RMSE: 0.03209073178173356
Evaluating sequence length: [56.6179987]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
Sequence length 56 gives RMSE: 0.02460560086324757
Evaluating sequence length: [52.54791581]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
Sequence length 52 gives RMSE: 0.025014913021501335
Evaluating sequence length: [16.07411828]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
Sequence length 16 gives RMSE: 0.023861373115103092
Evaluating sequence length: [24.0360277]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step
Sequence length 24 gives RMSE: 0.023225854204487105
Evaluating sequence length: [51.7166419]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
Sequence length 51 gives RMSE: 0.027042324884174275
Evaluating sequence length: [16.20606614]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
Sequence length 16 gives RMSE: 0.023502740533089966
Evaluating sequence length: [46.30287287]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
Sequence length 46 gives RMSE: 0.02384386827721581
Evaluating sequence length: [49.82064422]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
Sequence length 49 gives RMSE: 0.024961366824049095
Evaluating sequence length: [34.28670781]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
Sequence length 34 gives RMSE: 0.024267423688244496
Evaluating sequence length: [32.50262109]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Sequence length 32 gives RMSE: 0.031884527665465075
Evaluating sequence length: [57.44506485]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
Sequence length 57 gives RMSE: 0.023020761556010835
Evaluating sequence length: [26.22924102]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Sequence length 26 gives RMSE: 0.02284258586641402
Evaluating sequence length: [54.02544126]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
Sequence length 54 gives RMSE: 0.02587973333890972
Evaluating sequence length: [58.75065774]




[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
Sequence length 58 gives RMSE: 0.031772994105748825
Evaluating sequence length: [29.37461695]




[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
Sequence length 29 gives RMSE: 0.025078504769185828
Evaluating sequence length: [58.82009095]




[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step
Sequence length 58 gives RMSE: 0.02877740189893393
Evaluating sequence length: [13.6837397]




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Sequence length 13 gives RMSE: 0.02443991654664586
Evaluating sequence length: [25.60946678]




KeyboardInterrupt: 