In [8]:
import yfinance as yf
import pandas as pd
from prettytable import PrettyTable
from datetime import datetime, timedelta

# Function to check if a given date is a working day
def is_working_day(date_str):
    # Parse the date string to datetime format
    date_obj = datetime.strptime(date_str, '%Y-%m-%d')
    
    # Check if the date falls on a weekend (Saturday or Sunday)
    if date_obj.weekday() >= 5:
        return False
    
    # Check if the date falls on a special working day
    special_working_days = {"2024-01-27", "2024-03-02"}  # Example: Christmas Eve
    if date_str in special_working_days:
        return True
    
    # Check if the date falls on a holiday
    # Add your holiday logic here
    holidays = {"2023-01-26", "2023-03-07", "2023-03-30", "2023-04-04", "2023-04-07", 
                "2023-04-14", "2023-05-01", "2023-06-29", "2023-07-29", "2023-08-15", 
                "2023-09-19", "2023-10-02", "2023-10-24", "2023-11-14", "2023-11-27", 
                "2023-12-25","2024-01-22",
                "2024-01-26", "2024-03-08", "2024-03-25", "2024-03-29", "2024-04-11", 
                "2024-04-17", "2024-05-01", "2024-06-17", "2024-07-17", "2024-08-15", 
                "2024-10-02", "2024-11-01", "2024-11-15", "2024-12-25"}
    if date_str in holidays:
        return False
    
    # If the date is not a weekend, holiday, or special working day, it's a regular working day
    return True

# Function to get the next working date if the provided date is not a working day
def get_next_working_date(date_str):
    # Parse the date string to datetime format
    date_obj = datetime.strptime(date_str, '%Y-%m-%d')
    
    # Check if the date is a working day
    if is_working_day(date_str):
        return date_obj
    
    # If the date is not a working day, find the next working date
    while True:
        date_obj += timedelta(days=1)
        if is_working_day(date_obj.strftime('%Y-%m-%d')):
            return date_obj

# Define start and end dates
start_date = '2018-01-14'
end_date = '2024-02-17'
# #--------------------------->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# Download historical stock data
data = yf.download("YESBANK.NS", start=start_date, end=end_date)

# # Resample the data to monthly frequency
# data = data.resample('M').agg({
#     'Open': 'first',
#     'High': 'max',
#     'Low': 'min',
#     'Close': 'last',
#     'Adj Close': 'last',
#     'Volume': 'sum'
# })

# Remove rows corresponding to holidays
for date_str in data.index.strftime('%Y-%m-%d'):
    if not is_working_day(date_str):
        data = data[data.index != date_str]

# Calculate VWAP
data['Volume_Price'] = data['Volume'] * (data['High'] + data['Low'] + data['Close']) / 3
data['Cumulative_Volume_Price'] = data['Volume_Price'].cumsum()
data['Cumulative_Volume'] = data['Volume'].cumsum()
data['VWAP'] = data['Cumulative_Volume_Price'] / data['Cumulative_Volume']

# Calculate average volume over the entire time range
average_volume = data['Volume'].mean()
average_close = data['Close'].mean()

# Calculate three-day rolling average volume
data['Three_Day_Avg_Volume'] = data['Volume'].rolling(window=3).mean()

# Create a PrettyTable to store the data
table = PrettyTable()
table.field_names = ["Date", "Close", "Volume", "Price Change (%)", "Volume Change (%)", "VWAP", "Relative Volume","Relative Close", "Three-Day Relative Volume"]

# Add each row of data to the table and calculate percentage changes
prev_close = None
prev_volume = None
for index, row in data.iterrows():
    date_str = index.strftime("%Y-%m-%d")
    close = row['Close']
    volume = row['Volume']
    
    # Calculate percentage changes
    close_change = f"{((close - prev_close) / prev_close) * 100:.2f}%" if prev_close is not None else "-"
    volume_change = (f"{((volume - prev_volume) / prev_volume) * 100:.2f}%" 
                     if prev_volume is not None and prev_volume != 0 else "-")
    
    # Calculate relative volume
    relative_volume = volume / average_volume
    relative_close = close / average_close

    
    # Calculate three-day relative volume
    three_day_relative_volume = volume / row['Three_Day_Avg_Volume']
    
    # Add row to the table
    table.add_row([date_str, f"{close:.2f}", f"{volume:,}", close_change, volume_change, f"{row['VWAP']:.2f}", f"{relative_volume:.2f}",f"{relative_close:.2f}", f"{three_day_relative_volume:.2f}"])
    
    # Update previous close and volume
    prev_close = close
    prev_volume = volume

# Print the table
print(table)


[*********************100%%**********************]  1 of 1 completed

  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')



+------------+--------+-----------------+------------------+-------------------+--------+-----------------+----------------+---------------------------+
|    Date    | Close  |      Volume     | Price Change (%) | Volume Change (%) |  VWAP  | Relative Volume | Relative Close | Three-Day Relative Volume |
+------------+--------+-----------------+------------------+-------------------+--------+-----------------+----------------+---------------------------+
| 2018-01-15 | 336.00 |   7,142,164.0   |        -         |         -         | 338.27 |       0.06      |      4.19      |            nan            |
| 2018-01-16 | 334.85 |   7,296,505.0   |      -0.34%      |       2.16%       | 336.04 |       0.06      |      4.17      |            nan            |
| 2018-01-17 | 342.40 |   7,985,222.0   |      2.25%       |       9.44%       | 337.13 |       0.06      |      4.27      |            1.07           |
| 2018-01-18 | 341.20 |   35,465,087.0  |      -0.35%      |      344.13%      | 

In [9]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from prettytable import PrettyTable

# Assuming you have 'data' defined somewhere
# Preprocessing
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['Close', 'Volume']].values)

# Function to create sequences for LSTM input
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

# Define sequence length
sequence_length = 10

# Create sequences
X, y = create_sequences(scaled_data, sequence_length)

# Define and compile LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(units=2))  # Output layer with 2 neurons for Close and Volume
model.compile(optimizer='adam', loss='mse')

# Train LSTM model
model.fit(X, y, epochs=50, batch_size=32)

# Predict anomalies
predictions = model.predict(X)

# Repeat predictions to match the shape of X
predictions = np.repeat(predictions[:, np.newaxis, :], X.shape[1], axis=1)

# Calculate reconstruction errors
reconstruction_errors = np.mean(np.abs(predictions - X), axis=2)

# Threshold for anomaly detection
threshold = np.mean(reconstruction_errors) + 5* np.std(reconstruction_errors)

# Identify rows corresponding to shifts
shift_rows = data.iloc[sequence_length:][np.any(reconstruction_errors > threshold, axis=1)]
# print(shift_rows)
# Create a PrettyTable to store the shift rows
shift_table = PrettyTable()
shift_table.field_names = ["Date"] + list(shift_rows.columns)

# Add each row of shift_rows to the PrettyTable
for index, row in shift_rows.iterrows():
    shift_table.add_row([index.strftime("%Y-%m-%d")] + list(row))

# Print the shift table
print(shift_table)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
+------------+--------------------+--------------------+--------------------+--------------------+--------------------+-------------+--------------------+-------------------------+-------------------+--------------------+----------------------+
|    Date    |        Open        |        High        |        Low         |       Close        |     Adj Close      |    Volume   |    Volume_Price    | Cumulativ

In [None]:
from datetime import datetime, timedelta
from prettytable import PrettyTable

# Get the current date
current_date = datetime.now()
# print("current", current_date)
# # Iterate over the index of shift_rows DataFrame and print the dates
# for index, row in shift_rows.iterrows():
#     print(index.strftime("%Y-%m-%d"))

# Calculate the date 30 days ago
thirty_days_ago = current_date - timedelta(days=30)
# print(thirty_days_ago)
# Filter shift_rows for the last 30 days using the DataFrame index
last_30_days_data = shift_rows.loc[thirty_days_ago:current_date]

# If there is data for the last 30 days, print it; otherwise, print "No data"
if not last_30_days_data.empty:
    # Create a PrettyTable to store the last 30 records from the current date
    shift_table_last_30_days = PrettyTable()
    shift_table_last_30_days.field_names = ["Date"] + list(last_30_days_data.columns)

    # Add each row of the last 30 days data to the PrettyTable
    for date_index, row in last_30_days_data.iterrows():
        shift_table_last_30_days.add_row([date_index.strftime("%Y-%m-%d")] + list(row))

    # Print the shift table for the last 30 days data
    print(shift_table_last_30_days)
else:
    print("No data for the last 30 days from the shift_rows.")


In [3]:
# Iterate over each stock's data
for stock_code, stock_data in stock_data.items():
    print("Processing stock:", stock_code)

    # Preprocessing
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(stock_data[['Close', 'Volume']].values)

    # Create sequences
    X, y = create_sequences(scaled_data, sequence_length)

    # Debugging: Print shapes of X and y
    print("Shape of X:", X.shape)
    print("Shape of y:", y.shape)

    # Define and compile LSTM model
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
    model.add(LSTM(units=50))
    model.add(Dense(units=2))  # Output layer with 2 neurons for Close and Volume
    model.compile(optimizer='adam', loss='mse')

    # Train LSTM model
    history = model.fit(X, y, epochs=50, batch_size=32)

    # Debugging: Plot training loss over epochs
    plt.plot(history.history['loss'])
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show()

    # Predict anomalies
    predictions = model.predict(X)

    # Reshape predictions to match the shape of X
    predictions = np.repeat(predictions[:, np.newaxis, :], X.shape[1], axis=1)

    # Calculate reconstruction errors
    reconstruction_errors = np.mean(np.abs(predictions - X), axis=2)

    # Debugging: Plot reconstruction errors
    plt.plot(reconstruction_errors)
    plt.title('Reconstruction Errors')
    plt.xlabel('Sample')
    plt.ylabel('Error')
    plt.show()

NameError: name 'stock_data' is not defined