## Loading Dataset from S3 bucket

In [25]:
import boto3
import pandas as pd
from io import StringIO

# AWS S3 Configuration
s3 = boto3.client('s3')
bucket_name = "stock-prediction-data-01"
file_key = "google_stock_price_full.csv" 

# Load dataset from S3
response = s3.get_object(Bucket=bucket_name, Key=file_key)
data = response['Body'].read().decode('utf-8')

# Convert CSV to DataFrame
df = pd.read_csv(StringIO(data), parse_dates=['Date'])
df.set_index('Date', inplace=True)

df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004-08-19,2.490664,2.591785,2.390042,2.499133,2.499133,897427216
2004-08-20,2.51582,2.716817,2.503118,2.697639,2.697639,458857488
2004-08-23,2.758411,2.826406,2.71607,2.724787,2.724787,366857939
2004-08-24,2.770615,2.779581,2.579581,2.61196,2.61196,306396159
2004-08-25,2.614201,2.689918,2.587302,2.640104,2.640104,184645512


## Data Preprocessing using AWS Glue

In [26]:
from sklearn.preprocessing import MinMaxScaler

# Select relevant columns
features = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
df = df[features]

# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
df_scaled = scaler.fit_transform(df)
df_scaled = pd.DataFrame(df_scaled, columns=features, index=df.index)

# Split dataset
train = df_scaled.loc["2019-01-01":"2023-06-30"]
val = df_scaled.loc["2023-07-01":"2023-12-31"]
test = df_scaled.loc["2024-01-01":"2024-04-04"]

# Save processed data to S3
train.to_csv("train.csv")
val.to_csv("val.csv")
test.to_csv("test.csv")
s3.put_object(Bucket=bucket_name, Key="processed/train.csv", Body=train.to_csv(index=False))
s3.put_object(Bucket=bucket_name, Key="processed/val.csv", Body=val.to_csv(index=False))
s3.put_object(Bucket=bucket_name, Key="processed/test.csv", Body=test.to_csv(index=False))

{'ResponseMetadata': {'RequestId': 'N9Q4V0XDCZA2ZP0C',
  'HostId': 'm+elyw3s/aTxhchFIUelzlF4EE9v7hMPvDEPNJ0lCYVJ2OArrQISbtEc2WrgYQq9yrB/brjDXEueDDRl0QL4kBvUUQ/dsmiO',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'm+elyw3s/aTxhchFIUelzlF4EE9v7hMPvDEPNJ0lCYVJ2OArrQISbtEc2WrgYQq9yrB/brjDXEueDDRl0QL4kBvUUQ/dsmiO',
   'x-amz-request-id': 'N9Q4V0XDCZA2ZP0C',
   'date': 'Sun, 23 Mar 2025 08:24:57 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"9e65c7404eb2eeeea8fff3727eba3983"',
   'x-amz-checksum-crc32': 'PYFdMA==',
   'x-amz-checksum-type': 'FULL_OBJECT',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"9e65c7404eb2eeeea8fff3727eba3983"',
 'ChecksumCRC32': 'PYFdMA==',
 'ChecksumType': 'FULL_OBJECT',
 'ServerSideEncryption': 'AES256'}

## Building model with LSTM

In [27]:
!pip install numpy pandas tensorflow boto3 sagemaker matplotlib



In [28]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import numpy as np

# Load data from S3
train_df = pd.read_csv(f"s3://{bucket_name}/processed/train.csv")
val_df = pd.read_csv(f"s3://{bucket_name}/processed/val.csv")
test_df = pd.read_csv(f"s3://{bucket_name}/processed/test.csv")

# Convert data into sequences
def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data.iloc[i:i+seq_length].values)
        y.append(data.iloc[i+seq_length]['Close'])  
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(train_df)
X_val, y_val = create_sequences(val_df)

# Define LSTM Model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

# Train Model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20,batch_size=32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tf_keras.src.callbacks.History at 0x7f08d8a6a740>

## Saving the model

In [29]:
# Saving model
model.save("lstm_model.h5")

# Upload to S3
s3.upload_file("lstm_model.h5", bucket_name, "models/lstm_model.h5")

  saving_api.save_model(


## Storing Predictions in DynamoDB 

In [30]:
import boto3
from datetime import datetime, timedelta

# Load test data
X_test, y_test = create_sequences(test_df)

# Make predictions
predictions = model.predict(X_test)

# Initialize DynamoDB
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('StockPredictions')

# Store predictions in DynamoDB
for i, pred in enumerate(predictions):
    date_str = (datetime(2024, 1, 1) + timedelta(days=i)).strftime('%Y-%m-%d')  # Convert to string
    table.put_item(
        Item={
            'Date': date_str,  # Store date as string
            'Predicted_Close': str(pred[0])  # Convert prediction to string if needed
        }
    )



## Visualization with Power BI

In [32]:
import boto3
import pandas as pd

# Initialize DynamoDB client
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('StockPredictions')  # Replace with your table name

# Scan the table to get all data
response = table.scan()
data = response.get('Items', [])

# Convert to DataFrame
df = pd.DataFrame(data)

# Save as CSV
df.to_csv("dynamodb_data.csv", index=False)
print("Data exported successfully!")


Data exported successfully!


## Store the predictions in Azure from DynamoDB

In [3]:
!pip install azure-storage-blob

Collecting azure-storage-blob
  Downloading azure_storage_blob-12.25.0-py3-none-any.whl.metadata (26 kB)
Collecting azure-core>=1.30.0 (from azure-storage-blob)
  Downloading azure_core-1.32.0-py3-none-any.whl.metadata (39 kB)
Collecting isodate>=0.6.1 (from azure-storage-blob)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Downloading azure_storage_blob-12.25.0-py3-none-any.whl (406 kB)
Downloading azure_core-1.32.0-py3-none-any.whl (198 kB)
Downloading isodate-0.7.2-py3-none-any.whl (22 kB)
Installing collected packages: isodate, azure-core, azure-storage-blob
Successfully installed azure-core-1.32.0 azure-storage-blob-12.25.0 isodate-0.7.2


In [4]:
import boto3
import json
from azure.storage.blob import BlobServiceClient

# AWS DynamoDB connection
dynamodb = boto3.resource('dynamodb', region_name="us-east-1")
table = dynamodb.Table("StockPredictions")

# Fetch ML model predictions from DynamoDB
response = table.scan()
items = response["Items"]

# Azure Blob Storage connection
AZURE_STORAGE_CONNECTION_STRING = "DefaultEndpointsProtocol=https;AccountName=stockpred;AccountKey=a3U7KDBDz2j7fxIq01eAJHOOTKVkiv+odFo+rVDY12psnE/uWdVlCYbIpofTJv43ilk96tBK76z1+AStSbbgEQ==;EndpointSuffix=core.windows.net"
blob_service_client = BlobServiceClient.from_connection_string(AZURE_STORAGE_CONNECTION_STRING)
blob_client = blob_service_client.get_blob_client(container="stockmarket", blob="predictions.json")

# Upload AWS ML data to Azure
blob_client.upload_blob(json.dumps(items), overwrite=True)
print("Data synced to Azure Blob Storage!")

Data synced to Azure Blob Storage!
