In [None]:
import pickle

def read_pickle_file(file_path):
    """Reads a pickle file and returns its contents.

    Args:
        file_path (str): The path to the pickle file.

    Returns:
        The contents of the pickle file.
    """

    try:
        with open(file_path, 'rb') as f:
            data = pickle.load(f)
        return data
    except FileNotFoundError:
        print(f"Error: Pickle file '{file_path}' not found.")
        return None
    except Exception as e:
        print(f"Error reading pickle file: {e}")
        return None

# Example usage:
file_path = r"C:\Users\nisha\Documents\PythonProjects\TradingAnalysis\src\data\pickle\historical_data_hour.pkl"
data = read_pickle_file(file_path)
symbol='AMD'
data=data[data['symbol']==symbol]

if data is not None:
    # Process the data
    print(f'Number of rows in dataset is {len(data)}')
else:
    print("Failed to read the pickle file.")

In [None]:
data['symbol'].value_counts()

In [None]:
data

In [1]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))


TensorFlow version: 2.10.1
Num GPUs Available:  1


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (7,4.5) # Make the default figures a bit bigger
import numpy as np
import random
import pandas as pd
from datetime import datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

In [None]:
#data['date'] = pd.to_datetime(data['timestamp']).dt.strftime('%Y-%m-%d')
data.columns

In [None]:
data=data.reset_index()

In [None]:
data['date'] = pd.to_datetime(data['timestamp']).dt.strftime('%Y-%m-%d')

In [None]:
data= data[['date', 'close']]
data['date'] = pd.to_datetime(data['date'])
data.dtypes

In [None]:
data['date'].min(), data['date'].max()

In [None]:
import plotly.graph_objects as go
max_date = data['date'].max().strftime('%Y-%m-%d')
min_date = data['date'].min().strftime('%Y-%m-%d')
fig = go.Figure()
fig.add_trace(go.Scatter(x=data['date'], y=data['close'], name='Close price'))
fig.update_layout(showlegend=True, title=f'{symbol} between {min_date}  and {max_date}')
fig.show()

In [None]:
train_size = int(data.shape[0]*0.7)
# test_size = data.shape[0]-train_size
train = data[:train_size]
test = data[train_size:]
train.shape, test.shape

In [None]:
from sklearn.preprocessing import StandardScaler

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler on the 'close' column of the training data
scaler.fit(train[['close']])

# Transform 'close' in both training and test sets
train.loc[:, 'close'] = scaler.transform(train[['close']])
test.loc[:, 'close'] = scaler.transform(test[['close']])

In [None]:
TIME_STEPS=30

def create_sequences(X, y, time_steps=TIME_STEPS):
    Xs, ys = [], []
    for i in range(len(X)-time_steps):
        Xs.append(X.iloc[i:(i+time_steps)].values)
        ys.append(y.iloc[i+time_steps])
    
    return np.array(Xs), np.array(ys)

X_train, y_train = create_sequences(train[['close']], train['close'])
X_test, y_test = create_sequences(test[['close']], test['close'])

In [None]:
print(f'Training shape: {X_train.shape}')

print(f'Testing shape: {X_test.shape}')

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, RepeatVector, TimeDistributed
model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(rate=0.2))
model.add(RepeatVector(X_train.shape[1]))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(rate=0.2))
model.add(TimeDistributed(Dense(X_train.shape[2])))
model.compile(optimizer='adam', loss='mae')
model.summary()

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1,
                    callbacks=[EarlyStopping(monitor='val_loss', patience=3, mode='min')], shuffle=False)

In [None]:
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.legend();

In [None]:
model.evaluate(X_test, y_test)

In [None]:
X_train_pred = model.predict(X_train, verbose=0)
train_mae_loss = np.mean(np.abs(X_train_pred - X_train), axis=1)

plt.hist(train_mae_loss, bins=50)
plt.xlabel('Train MAE loss')
plt.ylabel('Number of Samples');

#threshold = np.max(train_mae_loss)
threshold = np.mean(train_mae_loss) + np.std(train_mae_loss)
print(f'Reconstruction error threshold: {threshold}')

In [None]:
X_test_pred = model.predict(X_test, verbose=0)
test_mae_loss = np.mean(np.abs(X_test_pred-X_test), axis=1)

plt.hist(test_mae_loss, bins=50)
plt.xlabel('Test MAE loss')
plt.ylabel('Number of samples');

In [None]:
test_score_df = pd.DataFrame(test[TIME_STEPS:])
test_score_df['loss'] = test_mae_loss
test_score_df['threshold'] = threshold
test_score_df['anomaly'] = test_score_df['loss'] > test_score_df['threshold']
test_score_df['close'] = test[TIME_STEPS:]['close']

In [None]:
test_score_df.tail()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=test_score_df['date'], y=test_score_df['loss'], name='Test loss'))
fig.add_trace(go.Scatter(x=test_score_df['date'], y=test_score_df['threshold'], name='Threshold'))
fig.update_layout(showlegend=True, title='Test loss vs. Threshold')
fig.show()

In [None]:
anomalies = test_score_df.loc[test_score_df['anomaly'] == True]
anomalies.head()

In [None]:
len(anomalies)

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=test_score_df['date'], y=scaler.inverse_transform(test_score_df['close']), name='Close price'))
fig.add_trace(go.Scatter(x=anomalies['date'], y=scaler.inverse_transform(anomalies['close']).reshape(1,-1), mode='markers', name='Anomaly'))
fig.update_layout(showlegend=True, title='Detected anomalies')
fig.show()

In [None]:
fig = go.Figure()
x=test_score_df['date']
y=test_score_df['close']
fig.add_trace(go.Scatter(x=x,y=y))
fig.show()

In [None]:
import streamlit as st
st.__version__

In [4]:
import sys
# Get the current Python interpreter path
print(f"Current Python interpreter: {sys.executable}")


Current Python interpreter: C:\Users\nisha\Documents\PythonProjects\TradingAnalysis\venv\Scripts\python.exe
