### DATA GENERATION

In [None]:
%store -r
minutes=60
import pandas as pd
from io import StringIO
import io,random,datetime,boto3,sagemaker,os,time,json
import time
from IPython.display import clear_output
from sklearn import preprocessing as skpreprocessing

In [None]:
os.environ["AWS_ACCESS_KEY_ID"] = sgmkr_config['ACCESS_KEY']
os.environ["AWS_SECRET_ACCESS_KEY"] = sgmkr_config['SECRET_KEY']
os.environ["AWS_DEFAULT_REGION"] = sgmkr_config['REGION_NAME']

## Wating for sagemker endpoint to get ready

In [None]:
client = boto3.client(service_name="sagemaker",region_name=sgmkr_config['REGION_NAME'])
st = 1
endpoint_name = sgmkr_config['ENDPOINT_NAME']
describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
while describe_endpoint_response["EndpointStatus"] == "Creating":
    describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
    print(f"{st} Sagemaker is creating endpoint, wait ...")
    clear_output(wait=True)
    time.sleep(15)
    st += 1
if describe_endpoint_response["EndpointStatus"] == "InService":
    print("Endpoint is ready")
else:
    raise Exception(f"Endpoint is in {describe_endpoint_response['EndpointStatus']} state")
    print("Try creating the endpoint again")

In [None]:
def save_dataset_to_s3(data, monitor_name, name, typeofdata):
    file_name = name + ".csv"
    data_dir = os.path.join(
        monitor_name,
        typeofdata
    )
    file_path = os.path.join(data_dir, file_name)
    with io.StringIO() as csv_buffer:
        data.to_csv(csv_buffer, index=False)

        response =  boto3.client("s3").put_object(
            Bucket=sgmkr_config['BUCKET'], Key=file_path, Body=csv_buffer.getvalue()
        )
        status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")

        if status == 200:
            print(f"Successful S3 put_object response. Status - {status}")
            print(file_path)
        else:
            print(f"Unsuccessful S3 put_object response. Status - {status}")


In [None]:
boto_session = boto3.session.Session(aws_access_key_id=sgmkr_config['ACCESS_KEY'],
                                     aws_secret_access_key=sgmkr_config['SECRET_KEY'],
                                     region_name=sgmkr_config['REGION_NAME'])
sagemaker_session = sagemaker.Session(boto_session)

In [None]:
dataframe = pd.read_csv('https://storage.googleapis.com/insurance-data/insurance/insurance.csv')
for col in ['sex', 'smoker', 'region']:
    if (dataframe[col].dtype == 'object'):
        le = skpreprocessing.LabelEncoder()
        le = le.fit(dataframe[col])
        dataframe[col] = le.transform(dataframe[col])
        print('Completed Label encoding on',col)

### Invoke Endpoint

In [None]:
count = 1
t_end = time.time() + 60 * minutes
while time.time() < t_end:
    no_of_samples = random.randint(10,15)
    sample_df = dataframe.sample(no_of_samples)
    labels = sample_df["charges"]
    sample_df = sample_df.drop(["charges"],axis=1)
    csv_file = io.StringIO()
    # by default sagemaker expects comma seperated
    sample_df.to_csv(csv_file, sep=",", header=False, index=False)
    my_payload_as_csv = csv_file.getvalue()
    print("Invoking endpoint....")
    predictor = sagemaker.predictor.Predictor(
               endpoint_name, 
               sagemaker_session=sagemaker_session,
               serializer=sagemaker.serializers.CSVSerializer(),
               ContentType="text/csv")                                                           
    response=predictor.predict(my_payload_as_csv)
    
    start = datetime.datetime.utcnow()
    end = start + datetime.timedelta(seconds=10)
    predictions = json.loads(response.decode("utf-8"))
    timestamps = pd.date_range(start, end, len(labels))
    labelled_df = pd.DataFrame({
        "timestamp": timestamps,
        "charges": predictions,
        "GT_target": labels
    })
    gtfilename = str(count) +"_GTdata"
    gt_data = pd.concat([sample_df, labelled_df], axis=1, join='inner')
    save_dataset_to_s3(gt_data, sgmkr_config['MONITOR_NAME'], gtfilename, "groundtruth")
    count = count +1
    time.sleep(360)