In [None]:
from pprint import pprint
from typing import Dict, Text, Tuple

from models import RetrievalModel, RankingModel

import numpy as np
import pandas as pd
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_ranking as tfr
import tensorflow_recommenders as tfrs

In [None]:
customer_ratings_df = pd.read_csv('clean_data.csv')
operators_df = customer_ratings_df['operator'].unique()

In [None]:
customer_ratings_df = customer_ratings_df[:8000].copy()

In [None]:
customer_ratings = tf.data.Dataset.from_tensor_slices(dict(customer_ratings_df))
operators = tf.data.Dataset.from_tensor_slices(operators_df)

In [None]:
customer_ratings = customer_ratings.map(lambda x: {
    'operator_name': x['operator'],
    'customer_name': x['customer_name'],
    'ticket_subject': x['ticket_subject']
})

In [None]:
tf.random.set_seed(42)
shuffled = customer_ratings.shuffle(8_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(6_400)
test = shuffled.skip(6_400).take(1_600)

In [None]:
operator_names = operators.batch(2_000)
customer_names = customer_ratings.batch(4_000).map(lambda x: x["customer_name"])
ticket_subjects = customer_ratings.batch(200).map(lambda x: x['ticket_subject'])

In [None]:
unique_operator_names = np.unique(np.concatenate(list(operator_names)))
unique_customer_names = np.unique(np.concatenate(list(customer_names)))

In [None]:
for x in train.take(10).as_numpy_iterator():
    pprint(x)

---

# Модель пошуку

In [None]:
retrieval_model = RetrievalModel(
    operators=operators,
    layer_sizes=[1024, 512, 128, 64, 32],
    unique_customer_names=unique_customer_names,
    unique_operator_names=unique_operator_names,
    ticket_subjects=ticket_subjects
)

In [None]:
retrieval_model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.02))

In [None]:
cached_train = train.shuffle(6_000).batch(10000).cache()
cached_test = test.batch(4000).cache()

In [None]:
retrieval_model.fit(
    cached_train, 
    epochs=100,
    validation_data=cached_test,
    validation_freq=5,
    verbose=0
)

In [None]:
retrieval_model.evaluate(cached_test, return_dict=True)

In [None]:
index = tfrs.layers.factorized_top_k.BruteForce(retrieval_model.query_model, k=20)

In [None]:
index.index_from_dataset(
  tf.data.Dataset.zip((operators.batch(100), operators.batch(100).map(retrieval_model.candidate_model)))
)

# Задаємо ім'я клієнта та тему звернення

In [160]:
customer_test = {'customer_name': tf.expand_dims('Heather Love', axis=0), 
                 'ticket_subject': tf.expand_dims('Product setup', axis=0)
                }

In [161]:
_, operator_suggestions = index(customer_test)
print(
    f"Recommendations for customer {customer_test['customer_name']}: {operator_suggestions[0, :]}"
)

Recommendations for customer [b'Heather Love']: [b'Alexandra Aguilar' b'Sheila Miranda' b'Tracy Green' b'Johnny Phelps'
 b'Brandi Guerrero' b'Mr. Michael Wilcox III' b'Michael Young'
 b'Jennifer Jones' b'Stephanie Green' b'Brenda Boyle' b'Devin Murillo'
 b'Isaiah Martinez' b'Stephen Turner' b'Edward Jones' b'Wayne George'
 b'Justin Walter' b'Kelsey Olson' b'Breanna Melton' b'Anthony Pearson'
 b'Cody Fisher']


---

# Модель ренкінгу

In [None]:
ratings = tf.data.Dataset.from_tensor_slices({
    'operator_name': customer_ratings_df['operator'],
    'customer_name': customer_ratings_df['customer_name'],
    'rating': customer_ratings_df['customer_satisfaction_rating'],
})

In [None]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(8_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(6_400)
test = shuffled.skip(6_400).take(1_600)

In [None]:
ranking_model = RankingModel(
    unique_customer_names=unique_customer_names, 
    unique_operator_names=unique_operator_names)

In [None]:
ranking_model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.02))

In [None]:
cached_train = train.shuffle(6_000).batch(10000).cache()
cached_test = test.batch(4000).cache()

In [None]:
ranking_model.fit(cached_train, epochs=500)

In [None]:
ranking_model.evaluate(cached_test, return_dict=True)

# Беремо список рекомендованих операторів отриманих з моделі пошуку та передаємо їх до моделі ренкінгу

In [162]:
suggested_titles = operator_suggestions[0].numpy()

In [163]:
suggested_titles

array([b'Alexandra Aguilar', b'Sheila Miranda', b'Tracy Green',
       b'Johnny Phelps', b'Brandi Guerrero', b'Mr. Michael Wilcox III',
       b'Michael Young', b'Jennifer Jones', b'Stephanie Green',
       b'Brenda Boyle', b'Devin Murillo', b'Isaiah Martinez',
       b'Stephen Turner', b'Edward Jones', b'Wayne George',
       b'Justin Walter', b'Kelsey Olson', b'Breanna Melton',
       b'Anthony Pearson', b'Cody Fisher'], dtype=object)

In [164]:
test_ratings = {}
for operator_name in suggested_titles:
    test_ratings[operator_name] = ranking_model({
        "customer_name": np.array(['Heather Love']),
        "operator_name": np.array([operator_name])
    })

In [165]:
print("Ratings:")
for title, score in sorted(test_ratings.items(), key=lambda x: x[1], reverse=True):
    print(f"operator name: {title}: {score}")

Ratings:
operator name: b'Jennifer Jones': [[3.9012105]]
operator name: b'Devin Murillo': [[3.6597815]]
operator name: b'Brandi Guerrero': [[3.5479312]]
operator name: b'Stephanie Green': [[3.5460553]]
operator name: b'Alexandra Aguilar': [[3.521015]]
operator name: b'Johnny Phelps': [[3.4977694]]
operator name: b'Tracy Green': [[3.490428]]
operator name: b'Edward Jones': [[3.4166121]]
operator name: b'Isaiah Martinez': [[3.4134097]]
operator name: b'Mr. Michael Wilcox III': [[3.093484]]
operator name: b'Kelsey Olson': [[3.033016]]
operator name: b'Sheila Miranda': [[2.9572043]]
operator name: b'Brenda Boyle': [[2.9271116]]
operator name: b'Wayne George': [[2.8979099]]
operator name: b'Cody Fisher': [[2.8560028]]
operator name: b'Breanna Melton': [[2.855464]]
operator name: b'Stephen Turner': [[2.8500922]]
operator name: b'Justin Walter': [[2.8345716]]
operator name: b'Michael Young': [[2.7107525]]
operator name: b'Anthony Pearson': [[2.676967]]


In [None]:
filtered_df = customer_ratings_df[customer_ratings_df['operator'].isin(operator_suggestions[0].numpy().astype(str))]

In [None]:
filtered_df.groupby('operator')['customer_satisfaction_rating'].mean().sort_values(ascending=False)

In [None]:
filtered_df['ticket_subject'].value_counts()