#### Notebook that does real time inference, fetching data, processing it synchronously and calling model endpoint for results

In [25]:
import pandas as pd
import json
import numpy as np
import os
import sys
import boto3

In [26]:
project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.append(project_root)

In [27]:
from importlib import reload
from src.data.collector import DataCollector
from src.data.processor import DataProcessor

collector = DataCollector(
    filename=f"data-inference.csv",
    days=3,
    ticker="^GSPC",
    num_rows=31,
    outputpath="../data/raw/",
)

processor = DataProcessor()

In [28]:
df = collector.get_data()
df["Datetime"] = df.index
df = df.reset_index(drop=True)

[*********************100%%**********************]  1 of 1 completed


In [29]:
df = processor.drop_columns(df)

df = processor.sort_by_datetime(df)
df = processor.extract_date_features(df)
df = processor.one_hot_encode_day_of_week(df)

df = processor.convert_datetime_to_iso_8601(df)

lag = 30

df.drop(columns=["Datetime"], inplace=True)
df = processor.prepare_data(df, lag)

df = df.rename(columns=processor.convert_col_name)
df.drop(columns=["close_target"], inplace=True)

csv_input = df.to_csv(index=False, header=False)

In [30]:
runtime_client = boto3.client("sagemaker-runtime")

endpoint_name = "sagemaker-xgboost-2024-06-22-15-43-24-818"

In [32]:
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name, ContentType="text/csv", Body=csv_input
)

result = json.loads(response["Body"].read().decode())
predictions = np.array(result)

print(predictions)

if predictions > 0.5:
    print("The market will go up")
else:
    print("The market will go down")

0.5090364813804626
The market will go up
