In [24]:
import mlflow
from mlflow.tracking import MlflowClient
from sklearn.preprocessing import LabelEncoder, StandardScaler

import pickle
import pandas as pd
from flask import Flask, request, jsonify
import boto3
import os
import awsgi

In [9]:
mlflow.set_tracking_uri("http://MLflow-MLFLO-QpxAlKAMoAEb-7f0b3c0531d6ae08.elb.us-east-1.amazonaws.com")
client = MlflowClient()


In [10]:
client

<mlflow.tracking.client.MlflowClient at 0x13672cfd0>

In [20]:
model_name = "customer-satisfaction-classifier"
filter_string = f"name='{model_name}'"
results = client.search_registered_models(filter_string=filter_string)
print("-" * 80)
for res in results:
    for mv in res.latest_versions:
        s3_uri = mv.tags['s3']


--------------------------------------------------------------------------------


In [26]:
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")


In [78]:
def read_data(s3_bucket, key):
    s3_client = boto3.client(
        "s3",
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    )
    response = s3_client.get_object(Bucket=s3_bucket, Key=key)
    df = pd.read_csv(response.get("Body"))
    df["Gender"] = df.Gender.astype("category")
    df["VisitFrequency"] = df.VisitFrequency.astype("category")
    df["PreferredCuisine"] = df.PreferredCuisine.astype("category")
    df["TimeOfVisit"] = df.TimeOfVisit.astype("category")
    df["DiningOccasion"] = df.DiningOccasion.astype("category")
    df["MealType"] = df.MealType.astype("category")
    df["DiningOccasion"] = df.DiningOccasion.astype("category")
    df["Income_per_AverageSpend"] = df["Income"] / df["AverageSpend"]
    df["AverageSpend_per_GroupSize"] = df["AverageSpend"] / df["GroupSize"]
    df["Income_per_GroupSize"] = df["Income"] / df["GroupSize"]

    for col in df.columns:
        if df[col].dtype != "category":
            df[col] = df[col].astype(float)

    columns_to_dummy = [
        "VisitFrequency",
        "PreferredCuisine",
        "TimeOfVisit",
        "DiningOccasion",
    ]
    df = pd.get_dummies(df, columns=columns_to_dummy, dtype=int, drop_first=True)

    return df


def predict(s3_bucket, key, df):
    s3_client = boto3.client(
        "s3",
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    )
    response = s3_client.get_object(Bucket=s3_bucket, Key=key)
    scaler, labelencoder, model = pickle.load(response.get("Body"))
    print(df)
    num_columns = [
        "Age",
        "Income",
        "AverageSpend",
        "GroupSize",
        "WaitTime",
        "ServiceRating",
        "FoodRating",
        "AmbianceRating",
    ]
    columns_to_encode = ["Gender", "MealType"]
    labelencoder = LabelEncoder()

    for col in columns_to_encode:
        df[col] = labelencoder.fit_transform(df[col])
    x_scaled = scaler.transform(df[num_columns])
    df[num_columns] = pd.DataFrame(x_scaled, columns=num_columns)
    return model.predict(df)


def save_results(df, y_pred):
    df_result = pd.DataFrame()
    df_result["CustomerID"] = df["CustomerID"]
    df_result["predicted_rating"] = y_pred
    return df_result.loc[:, "predicted_rating"].mean()

In [79]:
df_data = read_data(
            "customer-satisfaction-823124982163",
            "predict/restaurant_customer_satisfaction 2.csv",
        )

In [80]:
df_data

Unnamed: 0,CustomerID,Age,Gender,Income,AverageSpend,GroupSize,MealType,OnlineReservation,DeliveryOrder,LoyaltyProgramMember,...,VisitFrequency_Rarely,VisitFrequency_Weekly,PreferredCuisine_Chinese,PreferredCuisine_Indian,PreferredCuisine_Italian,PreferredCuisine_Mexican,TimeOfVisit_Dinner,TimeOfVisit_Lunch,DiningOccasion_Casual,DiningOccasion_Celebration
0,654.0,35.0,Male,83380.0,27.829142,3.0,Takeaway,0.0,1.0,1.0,...,0,1,1,0,0,0,0,0,0,0
1,655.0,19.0,Male,43623.0,115.408622,1.0,Dine-in,0.0,0.0,0.0,...,1,0,0,0,0,0,1,0,1,0
2,656.0,41.0,Female,83737.0,106.693771,6.0,Dine-in,0.0,1.0,0.0,...,0,1,0,0,0,0,1,0,0,1
3,657.0,43.0,Male,96768.0,43.508508,1.0,Dine-in,0.0,0.0,0.0,...,1,0,0,1,0,0,0,1,0,1
4,658.0,55.0,Female,67937.0,148.084627,1.0,Takeaway,0.0,0.0,1.0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,2149.0,39.0,Male,114857.0,163.015254,2.0,Dine-in,0.0,1.0,1.0,...,0,0,0,0,0,0,0,1,0,0
1496,2150.0,37.0,Female,133506.0,190.991911,4.0,Takeaway,0.0,0.0,0.0,...,0,1,0,0,1,0,0,1,1,0
1497,2151.0,46.0,Male,119159.0,150.088604,4.0,Dine-in,0.0,1.0,0.0,...,0,0,0,0,0,0,0,1,1,0
1498,2152.0,24.0,Male,27970.0,196.363626,6.0,Dine-in,1.0,1.0,0.0,...,0,1,0,0,1,0,1,0,1,0


In [81]:
predict('mlflow-artifacts-823124982163', '7/9561bb473140477b826cff3d1b825c0c/artifacts/models_pickle/lin_xbg.bin', df_data)

      CustomerID   Age  Gender    Income  AverageSpend  GroupSize  MealType  \
0          654.0  35.0    Male   83380.0     27.829142        3.0  Takeaway   
1          655.0  19.0    Male   43623.0    115.408622        1.0   Dine-in   
2          656.0  41.0  Female   83737.0    106.693771        6.0   Dine-in   
3          657.0  43.0    Male   96768.0     43.508508        1.0   Dine-in   
4          658.0  55.0  Female   67937.0    148.084627        1.0  Takeaway   
...          ...   ...     ...       ...           ...        ...       ...   
1495      2149.0  39.0    Male  114857.0    163.015254        2.0   Dine-in   
1496      2150.0  37.0  Female  133506.0    190.991911        4.0  Takeaway   
1497      2151.0  46.0    Male  119159.0    150.088604        4.0   Dine-in   
1498      2152.0  24.0    Male   27970.0    196.363626        6.0   Dine-in   
1499      2153.0  51.0    Male  148333.0    171.119498        4.0   Dine-in   

      OnlineReservation  DeliveryOrder  LoyaltyProg

ValueError: y contains previously unseen labels: 'Male'