### DATA GENERATION

In [None]:
%store -r
import pandas as pd
from io import StringIO
import io,random,datetime,boto3,sagemaker,os,time,json

In [None]:
os.environ["AWS_ACCESS_KEY_ID"] = ACCESS_KEY
os.environ["AWS_SECRET_ACCESS_KEY"] = SECRET_KEY
os.environ["AWS_DEFAULT_REGION"] = REGION_NAME

boto_session = boto3.session.Session(aws_access_key_id=ACCESS_KEY,aws_secret_access_key=SECRET_KEY,region_name=REGION_NAME)
sagemaker_session = sagemaker.Session(boto_session)

In [None]:
def save_dataset_to_s3(data, monitor_name, name, typeofdata):
    file_name = name + ".csv"
    data_dir = os.path.join(
        monitor_name,
        typeofdata
    )
    file_path = os.path.join(data_dir, file_name)
    with io.StringIO() as csv_buffer:
        data.to_csv(csv_buffer, index=False)

        response =  boto3.client("s3").put_object(
            Bucket=BUCKET, Key=file_path, Body=csv_buffer.getvalue()
        )
        status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")

        if status == 200:
            print(f"Successful S3 put_object response. Status - {status}")
            print(file_path)
        else:
            print(f"Unsuccessful S3 put_object response. Status - {status}")


### Invoke Endpoint

In [None]:
count = 1
while True:
    dataframe = pd.read_csv('insurance.csv')
    dataframe = dataframe.iloc[1:,0:7]
    no_of_samples = random.randint(10,15)
    sample_df = dataframe.sample(no_of_samples)
    labels = sample_df["charges"]
    sample_df = sample_df.drop(["charges"],axis=1)
    csv_file = io.StringIO()
    # by default sagemaker expects comma seperated
    sample_df.to_csv(csv_file, sep=",", header=False, index=False)
    my_payload_as_csv = csv_file.getvalue()
    print("Invoking endpoint....")
    predictor = sagemaker.predictor.Predictor(
               endpoint_name, 
               sagemaker_session=sagemaker_session,
               serializer=sagemaker.serializers.CSVSerializer(),
               ContentType="text/csv")                                                           
    response=predictor.predict(my_payload_as_csv)
    
    start = datetime.datetime.utcnow()
    end = start + datetime.timedelta(seconds=10)
    predictions = json.loads(response.decode("utf-8"))
    timestamps = pd.date_range(start, end, len(labels))
    labelled_df = pd.DataFrame({
        "timestamp": timestamps,
        "charges": predictions,
        "GT_Target": labels
    })
    gtfilename = str(count) +"_GTdata"
    gt_data = pd.concat([sample_df, labelled_df], axis=1, join='inner')
    save_dataset_to_s3(gt_data, DEPLOYMENT_NAME, gtfilename, "groundtruth")
    count = count +1
    time.sleep(60)