In [28]:
import boto3
from dotenv import load_dotenv
import os
from io import StringIO 
import numpy as np
import joblib
import pandas as pd

from io import BytesIO
# Initialize an S3 client
s3_client = boto3.client('s3')


load_dotenv()

# Access the environment variables for AWS credentials
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION')

s3_client = boto3.client(
    's3',
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name=AWS_DEFAULT_REGION
)

# Define the S3 bucket and the file path
bucket_name = 'isbfinanceproject'
def get_file_path(vertical, ticker):
    file_path = f"{vertical}/{ticker}/{ticker}_test_data.csv"
    return file_path

# Example usage
vertical = 'Energy'  # Replace with the actual vertical
ticker = 'RELIANCE.NS'       # Replace with the actual ticker

# Get the file path
file_path = get_file_path(vertical, ticker)

# Download the file to the local system
try:
    s3_client.download_file(bucket_name, file_path, f"./{ticker}_test_data.csv")
    print(f"File {ticker}_test_data.csv downloaded successfully.")
except Exception as e:
    print(f"Error downloading file: {e}")

File RELIANCE.NS_test_data.csv downloaded successfully.


In [29]:

features = ['Close', 'Volume', 'ma_7', 'ma_30', 'High', 'Low', 'Open', 'lag_1', 'lag_2', 'lag_7', 'daily_returns', 'volatility']
X_test = pd.read_csv(f"./{ticker}_test_data.csv")

X_test = X_test.drop([0, 1])
X_test = X_test.reset_index(drop=True)
X_test = X_test[features].fillna(0)  # Fill missing values if any
X_test = X_test.drop(columns=['Close']) 
X_test.head()

Unnamed: 0,Volume,ma_7,ma_30,High,Low,Open,lag_1,lag_2,lag_7,daily_returns,volatility
0,0.086313,0.481433,0.432302,0.465598,0.478201,0.4696,0.47554,0.486608,0.501358,0.45782,0.178926
1,0.110451,0.604958,0.681208,0.640061,0.638922,0.635287,0.637601,0.598286,0.633417,0.484992,0.212773
2,0.253587,0.082196,0.004192,0.092426,0.103898,0.090096,0.101538,0.116551,0.075027,0.528729,0.951929
3,0.081319,0.444457,0.446139,0.460882,0.474891,0.46197,0.467384,0.461503,0.463181,0.499437,0.064354
4,0.069292,0.887616,0.926659,0.892954,0.896535,0.88491,0.886463,0.876038,0.890724,0.476777,0.057512


In [30]:
def load_from_s3(file_name, bucket_name):
    s3 = boto3.client('s3')
    buffer = BytesIO()
    s3.download_fileobj(bucket_name, file_name, buffer)
    buffer.seek(0)  # Reset the pointer to the beginning of the file
    model = joblib.load(buffer)
    return model

# Load XGBoost model
xgb_model = load_from_s3('Models/XGB/best_xgb_model.pkl', 'isbfinanceproject')

# Load XGBoost parameters (if needed)
xgb_params = load_from_s3('Models/XGB/xgb_params.pkl', 'isbfinanceproject')

In [31]:
# Initialize the last known features for recursive forecasting
last_known_features = X_test.iloc[-1].copy()  # Start with the most recent row of features
forecasted_prices = []  # List to store predicted prices
forecast_horizon = 7  # Predict for the next 7 days

for step in range(forecast_horizon):
    # Predict the next price using the model
    next_price = xgb_model.predict(last_known_features.values.reshape(1, -1))[0]
    forecasted_prices.append(next_price)
    
    # Update features for the next prediction
    # Update lag features
    last_known_features['lag_7'] = last_known_features['lag_2']
    last_known_features['lag_2'] = last_known_features['lag_1']
    last_known_features['lag_1'] = next_price
    
    # Update moving averages
    recent_prices = [next_price, last_known_features['lag_1'], last_known_features['lag_2'], 
                     last_known_features['lag_7']]
    last_known_features['ma_7'] = sum(recent_prices[-7:]) / min(len(recent_prices), 7)
    last_known_features['ma_30'] = sum(recent_prices[-30:]) / min(len(recent_prices), 30)
    
    # Update daily returns
    last_known_features['daily_returns'] = (next_price - last_known_features['lag_1']) / last_known_features['lag_1']
    
    # Update volatility (e.g., standard deviation of recent prices)
    last_known_features['volatility'] = np.std(recent_prices[-7:])
    
    # Update other features if necessary (Volume, High, Low, etc.)
    # These may need to be predicted or assumed static for simplicity.

# Print the forecasted prices
print("Forecasted Prices for the next 7 days:", forecasted_prices)


Forecasted Prices for the next 7 days: [0.6011346, 0.5931832, 0.5932422, 0.59327245, 0.5932422, 0.5932422, 0.5932422]


In [32]:
# Define the S3 bucket and the file path
bucket_name = 'isbfinanceproject'
def get_file_path(vertical, ticker):
    file_path = f"{vertical}/{ticker}/{ticker}_data.csv"
    return file_path

# Example usage
vertical = 'Energy'  # Replace with the actual vertical
ticker = 'RELIANCE.NS'       # Replace with the actual ticker

# Get the file path
file_path = get_file_path(vertical, ticker)

# Download the file to the local system
try:
    s3_client.download_file(bucket_name, file_path, f"./{ticker}_data.csv")
    print(f"File {ticker}_data.csv downloaded successfully.")
except Exception as e:
    print(f"Error downloading file: {e}")



df = pd.read_csv(f"./{ticker}_data.csv")
df = df.drop([0, 1])
df = df.reset_index(drop=True)
df['Close'] = pd.to_numeric(df['Close'], errors='coerce')
min_close = df['Close'].min()
max_close = df['Close'].max()
df.rename(columns={'Price': 'Date'}, inplace=True)


def inverse_min_max_scaling(scaled_value, min_value, max_value):
    return (scaled_value * (max_value - min_value)) + min_value

# Rescale the forecasted prices
forecasted_prices_original_scale = [
    inverse_min_max_scaling(pred, min_close, max_close) + 150 for pred in forecasted_prices
]


File RELIANCE.NS_data.csv downloaded successfully.


In [33]:
forecasted_prices_original_scale

[1273.5610075290915,
 1264.0452577715896,
 1264.1158758452311,
 1264.152040858702,
 1264.1158758452311,
 1264.1158758452311,
 1264.1158758452311]

In [34]:
# Number of days to forecast
forecast_horizon = 30

# Initialize the last known features for recursive forecasting
last_known_features = X_test.iloc[-1].copy()  # Start with the most recent row of features
forecasted_prices_scaled_30 = []  # To store scaled predictions
forecasted_prices_original = []  # To store rescaled predictions

# Ensure min_close and max_close are numeric
min_close = float(min_close)
max_close = float(max_close)

# Recursive Forecast for 30 Days
for step in range(forecast_horizon):
    # Predict the next scaled price
    next_price_scaled = xgb_model.predict(last_known_features.values.reshape(1, -1))[0]
    forecasted_prices_scaled_30.append(next_price_scaled)
    
    # Rescale the predicted price to the original scale
    next_price_original = (next_price_scaled * (max_close - min_close)) + min_close + 150
    forecasted_prices_original.append(next_price_original)
    
    # Update features for the next prediction
    # Update lag features
    last_known_features['lag_7'] = last_known_features['lag_2']
    last_known_features['lag_2'] = last_known_features['lag_1']
    last_known_features['lag_1'] = next_price_scaled  # Use scaled value for lag
    
    # Update moving averages
    recent_prices = forecasted_prices_scaled_30[-7:]  # Use the last 7 predicted prices
    last_known_features['ma_7'] = sum(recent_prices) / len(recent_prices)
    recent_prices_30 = forecasted_prices_scaled_30[-30:] if len(forecasted_prices_scaled_30) >= 30 else recent_prices
    last_known_features['ma_30'] = sum(recent_prices_30) / len(recent_prices_30)
    
    # Update daily returns
    last_known_features['daily_returns'] = (
        (next_price_scaled - last_known_features['lag_1']) / last_known_features['lag_1']
    ) if last_known_features['lag_1'] != 0 else 0
    
    # Update volatility (e.g., standard deviation of recent prices)
    last_known_features['volatility'] = np.std(recent_prices) if len(recent_prices) > 1 else 0

# Print forecasted prices on both scales
print("Forecasted Prices (Scaled):", forecasted_prices_scaled_30)
print("Forecasted Prices (Original Scale):", forecasted_prices_original)


Forecasted Prices (Scaled): [0.6011346, 0.59309316, 0.5931983, 0.5932285, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422, 0.5932422]
Forecasted Prices (Original Scale): [1273.5610075290915, 1263.9374760450519, 1264.0633046126313, 1264.0994696261023, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311, 1264.1158758452311]


In [35]:
X_test

Unnamed: 0,Volume,ma_7,ma_30,High,Low,Open,lag_1,lag_2,lag_7,daily_returns,volatility
0,0.086313,0.481433,0.432302,0.465598,0.478201,0.469600,0.475540,0.486608,0.501358,0.457820,0.178926
1,0.110451,0.604958,0.681208,0.640061,0.638922,0.635287,0.637601,0.598286,0.633417,0.484992,0.212773
2,0.253587,0.082196,0.004192,0.092426,0.103898,0.090096,0.101538,0.116551,0.075027,0.528729,0.951929
3,0.081319,0.444457,0.446139,0.460882,0.474891,0.461970,0.467384,0.461503,0.463181,0.499437,0.064354
4,0.069292,0.887616,0.926659,0.892954,0.896535,0.884910,0.886463,0.876038,0.890724,0.476777,0.057512
...,...,...,...,...,...,...,...,...,...,...,...
241,0.063959,0.625979,0.669188,0.640257,0.649553,0.635442,0.645603,0.634728,0.640937,0.478483,0.114813
242,0.092257,0.844214,0.914871,0.845635,0.841818,0.843201,0.845727,0.848902,0.833360,0.494930,0.043194
243,0.516708,0.461165,0.369178,0.493103,0.487079,0.491009,0.491621,0.437016,0.493781,0.431651,0.292658
244,0.087239,0.226481,0.197722,0.224634,0.242955,0.232055,0.240666,0.252183,0.273156,0.461395,0.092250


In [36]:
dates=df['Date']

In [37]:
import pandas as pd
from datetime import timedelta

# Assuming `dates` is loaded as a Series of strings
dates = pd.to_datetime(dates)  # Ensure all dates are in datetime format

# Determine the last known date
last_date = dates.iloc[-1]  # Extract the last date

# Forecast horizon (e.g., 30 days)
forecast_horizon = len(forecasted_prices_original)

# Generate forecast dates
forecast_dates = [last_date + timedelta(days=i) for i in range(1, forecast_horizon + 1)]

print(f"Last known date: {last_date}")
print(f"Forecast dates: {forecast_dates}")


Last known date: 2024-11-14 00:00:00+00:00
Forecast dates: [Timestamp('2024-11-15 00:00:00+0000', tz='UTC'), Timestamp('2024-11-16 00:00:00+0000', tz='UTC'), Timestamp('2024-11-17 00:00:00+0000', tz='UTC'), Timestamp('2024-11-18 00:00:00+0000', tz='UTC'), Timestamp('2024-11-19 00:00:00+0000', tz='UTC'), Timestamp('2024-11-20 00:00:00+0000', tz='UTC'), Timestamp('2024-11-21 00:00:00+0000', tz='UTC'), Timestamp('2024-11-22 00:00:00+0000', tz='UTC'), Timestamp('2024-11-23 00:00:00+0000', tz='UTC'), Timestamp('2024-11-24 00:00:00+0000', tz='UTC'), Timestamp('2024-11-25 00:00:00+0000', tz='UTC'), Timestamp('2024-11-26 00:00:00+0000', tz='UTC'), Timestamp('2024-11-27 00:00:00+0000', tz='UTC'), Timestamp('2024-11-28 00:00:00+0000', tz='UTC'), Timestamp('2024-11-29 00:00:00+0000', tz='UTC'), Timestamp('2024-11-30 00:00:00+0000', tz='UTC'), Timestamp('2024-12-01 00:00:00+0000', tz='UTC'), Timestamp('2024-12-02 00:00:00+0000', tz='UTC'), Timestamp('2024-12-03 00:00:00+0000', tz='UTC'), Timestamp

In [38]:
dates.dtype
dates.count

<bound method Series.count of 0      2019-11-18 00:00:00+00:00
1      2019-11-19 00:00:00+00:00
2      2019-11-20 00:00:00+00:00
3      2019-11-21 00:00:00+00:00
4      2019-11-22 00:00:00+00:00
                  ...           
1234   2024-11-08 00:00:00+00:00
1235   2024-11-11 00:00:00+00:00
1236   2024-11-12 00:00:00+00:00
1237   2024-11-13 00:00:00+00:00
1238   2024-11-14 00:00:00+00:00
Name: Date, Length: 1239, dtype: datetime64[ns, UTC]>

In [39]:
df

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume,P/E Ratio,Beta (5Y Monthly),Market Cap,Forward P/E,EPS (TTM),Price.1,Sector,Industry
0,2019-11-18 00:00:00+00:00,646.4679565429688,667.097168,679.3491821289062,665.3599243164062,673.2460327148438,14076051,25.250996,0.635,1.715367e+13,19.849545,50.2,1252.05,Energy,Oil & Gas Refining & Marketing
1,2019-11-19 00:00:00+00:00,668.8631591796875,690.206909,692.5613403320312,669.7487182617188,670.6630249023438,30176255,25.250996,0.635,1.715367e+13,19.849545,50.2,1252.05,Energy,Oil & Gas Refining & Marketing
2,2019-11-20 00:00:00+00:00,685.6538696289062,707.533508,718.848388671875,705.4991455078125,710.9165649414062,43538120,25.250996,0.635,1.715367e+13,19.849545,50.2,1252.05,Energy,Oil & Gas Refining & Marketing
3,2019-11-21 00:00:00+00:00,681.2014770507812,702.938965,711.350830078125,698.8016357421875,706.322021484375,14897371,25.250996,0.635,1.715367e+13,19.849545,50.2,1252.05,Energy,Oil & Gas Refining & Marketing
4,2019-11-22 00:00:00+00:00,685.1444702148438,707.007751,717.5225830078125,702.93896484375,704.9962158203125,22352865,25.250996,0.635,1.715367e+13,19.849545,50.2,1252.05,Energy,Oil & Gas Refining & Marketing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1234,2024-11-08 00:00:00+00:00,1283.75,1283.750000,1301.6500244140625,1275.0,1297.6500244140625,19814406,25.250996,0.635,1.715367e+13,19.849545,50.2,1252.05,Energy,Oil & Gas Refining & Marketing
1235,2024-11-11 00:00:00+00:00,1272.699951171875,1272.699951,1286.0,1267.0,1278.949951171875,9056552,25.250996,0.635,1.715367e+13,19.849545,50.2,1252.05,Energy,Oil & Gas Refining & Marketing
1236,2024-11-12 00:00:00+00:00,1274.25,1274.250000,1289.300048828125,1267.5,1275.550048828125,10671091,25.250996,0.635,1.715367e+13,19.849545,50.2,1252.05,Energy,Oil & Gas Refining & Marketing
1237,2024-11-13 00:00:00+00:00,1252.050048828125,1252.050049,1275.449951171875,1249.5,1270.25,14633633,25.250996,0.635,1.715367e+13,19.849545,50.2,1252.05,Energy,Oil & Gas Refining & Marketing


In [41]:
import plotly.graph_objects as go
# Historical Data
historical_data = pd.DataFrame({
    'Date': dates,  # Use fixed datetime dates
    'Price': df['Close']  # Historical prices
})
historical_data = historical_data.iloc[-30:]
# Forecast Data
forecast_data = pd.DataFrame({
    'Date': forecast_dates,  # Generated future dates
    'Price': forecasted_prices_original  # Forecasted prices
})

# Combine data and plot
visualization_df = pd.concat([historical_data, forecast_data])

# Plot code remains the same
fig = go.Figure()
fig.add_trace(go.Scatter(x=historical_data['Date'], y=historical_data['Price'], mode='lines', name='Historical Prices'))
fig.add_trace(go.Scatter(x=forecast_data['Date'], y=forecast_data['Price'], mode='lines', name='Forecasted Prices'))
fig.update_layout(title='30-Day Stock Price Forecast', xaxis_title='Date', yaxis_title='Stock Price')
fig.show()