In [1]:
import tensorflow as tf
print(tf.__version__)

2.5.0


# Deployment package

In [11]:
def dynamic_padding(inp, min_size=100):
    # https://stackoverflow.com/questions/42334646/tensorflow-pad-unknown-size-tensor-to-a-specific-size
    pad_size = min_size - tf.shape(inp)[0]
    paddings = [[0, pad_size]] # Pad behind the name with spaces to align with padding from to_tensor default_value
    return tf.pad(inp, paddings, mode="CONSTANT", constant_values=" ")

def x_preprocess(x):
    x_processed = tf.strings.lower(x)
    x_processed = tf.strings.unicode_split(x_processed, input_encoding="UTF-8").to_tensor(default_value=" ") 

    # Pad only if necessary
    filter_size = 100
    x_processed = tf.cond(tf.less(tf.shape(x_processed)[1], filter_size), 
                        true_fn=lambda: tf.map_fn(lambda inp_name: dynamic_padding(inp_name, filter_size), x_processed), 
                        false_fn=lambda: tf.map_fn(lambda inp_name: tf.slice(inp_name, tf.constant([0]), tf.constant([100])), x_processed))

    # Convert to number
    x_processed = tf.strings.unicode_decode(x_processed, 'UTF-8')-96 # make a=1
    x_processed = tf.map_fn(lambda item: (tf.map_fn(lambda subitem: 0 if (subitem[0]<0 or subitem[0]>26)else subitem[0], item)), x_processed.to_tensor()) # To remove negative value on space (32-96 = -64 and set the shape correctly)
    x_processed = tf.cast(x_processed, tf.float32)
    
    return x_processed

def to_tensor_format(input_name):
    # Convert name to number
    input_name = tf.constant(input_name)
    x_processed = tf.map_fn(lambda name: x_preprocess([name]), input_name, dtype=tf.float32)

    return x_processed

def predict(instances, **kwargs):
    imported = tf.saved_model.load("gs://leo-us-name-gender/model/1/")
    f = imported.signatures["serving_default"]
    
    # Input Pre-Process
    x_processed = to_tensor_format(instances)
    
    # Predict
    predictions = tf.map_fn(lambda x:f(x)["output"], x_processed)
    predictions = tf.map_fn(lambda pred: tf.squeeze(pred), predictions)


    # Classes
    class_names = tf.constant(["F", "M"], dtype=tf.string)

    # Predictions are output from sigmoid so float32 in range 0 -> 1
    # Round to integers for predicted class and string lookup for class name
    prediction_integers = tf.cast(tf.math.round(predictions), tf.int32) 
    predicted_classes = tf.map_fn(lambda idx: class_names[idx], prediction_integers, dtype=tf.string)

    # Convert sigmoid output for probability
    # 1 (male) will remain at logit output
    # 0 (female) will be 1.0 - logit to give probability
    def to_probability(logit):
        if logit < 0.5:
            return 1.0 - logit
        else:
            return logit
    class_probability = tf.map_fn(to_probability, predictions, dtype=tf.float32)

    return {
        "gender": [gender.decode("utf-8") for gender in predicted_classes.numpy().tolist()],
        "probability": class_probability.numpy().tolist()
    }

In [13]:
reply = predict(["stephen", "stephanie"])
reply

{'gender': ['M', 'F'], 'probability': [0.5780652761459351, 0.9435783624649048]}

# ----- To be Updated -----

# HTTP Request

In [1]:
from google.auth.transport import requests
from google.oauth2 import service_account

# Construct service account credentials using the service account key file.
credentials = service_account.Credentials.from_service_account_file('../credentials/ds-api-user.json')
credentials = credentials.with_scopes(['https://www.googleapis.com/auth/cloud-platform'])

# Create a requests Session object with the credentials.
session = requests.AuthorizedSession(credentials)

# Make an authenticated API request
url = "https://ml.googleapis.com/v1/projects/leo-gcp-sandbox/models/name_gender_prediction:predict"
json = {"instances":[{"name":"stephen leo"}, {"name":"marie stephen leo"}]}
response = session.post(url, json=json)
print(response.json())

{'predictions': {'probability': [0.9067956805229187, 0.5227343440055847], 'gender': ['m', 'f']}}


# Create a json string with ~100 names for scalability testing

In [1]:
import pandas as pd

In [2]:
names_df = pd.read_csv("../data/train.csv")

In [11]:
f_df = names_df[names_df["gender"]=="F"].sample(n=50)
m_df = names_df[names_df["gender"]=="M"].sample(n=50)
names_sampled_df = pd.concat([f_df, m_df]).sample(frac=1)

In [39]:
names_dict = {"instances": names_sampled_df[["name"]].to_dict("records")}
import json
with open("../data/names_100.json", "w") as fp:
    json.dump(names_dict, fp)