In [1]:
from fastapi import FastAPI
from ml.data import process_data
from model.model import inference
from pydantic import BaseModel, Field
import os
import pandas as pd
import pickle

In [2]:
# Instantiate the app.
app = FastAPI()

In [3]:
#Load Models
result_model = pickle.load(open("ml/finalized_model.sav", 'rb'))
result_encoder = pickle.load(open("ml/OneHotEncoder.sav", 'rb'))
result_lb = pickle.load(open("ml/LabelBinarizer.sav", 'rb'))

In [4]:
class TaggedItem(BaseModel):
    age: int
    workclass: str
    fnlgt:int
    education: str
    education_num: int = Field(alias="education-num")
    marital_status: str = Field(alias="marital-status")
    occupation: str
    relationship: str
    race: str
    sex: str
    capital_gain: int = Field(alias="capital-gain")
    capital_loss: int = Field(alias="capital-loss")
    hours_per_week: int = Field(alias="hours-per-week") 
    native_country: str = Field(alias="native-country")
    

In [None]:
    class Example:
        example_schema = {
            "example": {
                "age": 20,
                "workclass": "Private",
                "fnlgt": 168187,
                "education": " Some-college",
                "education-num": 10,
                "marital-status": "Never-married",
                "occupation": "Other-service",
                "relationship": "Other-relative",
                "race": "White",
                "sex": "Female",
                "capital-gain": 4416,
                "capital-loss": 0,
                "hours-per-week": 25,
                "native-country": "United-States"
            }
        }

In [5]:
def convert_input_data(record):
    cat_features = [
    " workclass",
    " education",
    " marital-status",
    " occupation",
    " relationship",
    " race",
    " sex",
    " native-country",
    ]
    df_record=pd.DataFrame(record)
    X, y,encoder,lb = process_data(
            df_record, categorical_features=cat_features, encoder=result_encoder, lb=result_lb,
            )

    return X


In [6]:
# Define a GET on the specified endpoint.
@app.get("/")
async def initial_message():
    welcome_message="Greetings, Welcome to the api for module 3 of course Machine learning devops engineer"
    return welcome_message

In [9]:
@app.post("/predict/")
async def predict_salary(item: TaggedItem):
    X=convert_input_data(item)
    prediction=inference(result_model,X)
    category=result_lb.inverse_transform(pred[0])
    final_result= "The prediction of Salary according to the features is:" + category[0]
    return final_result

In [None]:
import requests
import json

def test_get_method():
    path = "http://127.0.0.1:8000/"
    response = requests.get(url=path)
    responseJson = json.loads(response.text)
    assert response.status_code == 200

In [21]:
example_schema = {
                "age": 20,
                "workclass": "Private",
                "fnlgt": 168187,
                "education": " Some-college",
                "education-num": 10,
                "marital-status": "Never-married",
                "occupation": "Other-service",
                "relationship": "Other-relative",
                "race": "White",
                "sex": "Female",
                "capital-gain": 4416,
                "capital-loss": 0,
                "hours-per-week": 25,
                "native-country": "United-States"
            }
df_record= pd.DataFrame.from_dict(example_schema,orient='index')
df_record

Unnamed: 0,0
age,20
workclass,Private
fnlgt,168187
education,Some-college
education-num,10
marital-status,Never-married
occupation,Other-service
relationship,Other-relative
race,White
sex,Female


In [18]:
df_record.dtypes

age                int64
workclass         object
fnlgt              int64
education         object
education-num      int64
marital-status    object
occupation        object
relationship      object
race              object
sex               object
capital-gain       int64
capital-loss       int64
hours-per-week     int64
native-country    object
dtype: object

In [24]:
nuevo_arreglo={"position":example_schema}

In [26]:
df_record= pd.DataFrame.from_dict(nuevo_arreglo,orient='index')
df_record

Unnamed: 0,age,workclass,fnlgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country
position,20,Private,168187,Some-college,10,Never-married,Other-service,Other-relative,White,Female,4416,0,25,United-States
