In [1]:
from pprint import pprint
from typing import Dict, Text, Tuple

import numpy as np
import pandas as pd
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_ranking as tfr
import tensorflow_recommenders as tfrs

from importlib import reload

In [2]:
from models import RetrievalModel, RankingModel

In [31]:
customer_ratings_df = pd.read_csv('../datasets/clean_data.csv')
operators_df = customer_ratings_df['operator'].unique()

In [32]:
customer_ratings_df = customer_ratings_df[:8000].copy()

In [33]:
customer_ratings = tf.data.Dataset.from_tensor_slices(dict(customer_ratings_df))
operators = tf.data.Dataset.from_tensor_slices(operators_df)

In [34]:
customer_ratings = customer_ratings.map(lambda x: {
    'operator_name': x['operator'],
    'customer_name': x['customer_name'],
    'ticket_subject': x['ticket_subject'],
    'ticket_type': x['ticket_type'],
    'ticket_description': x['ticket_description']
})

In [35]:
tf.random.set_seed(42)
shuffled = customer_ratings.shuffle(8_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(6_400)
test = shuffled.skip(6_400).take(1_600)

In [36]:
operator_names = operators.batch(2_000)
customer_names = customer_ratings.batch(4_000).map(lambda x: x["customer_name"])
ticket_subjects = customer_ratings.batch(200).map(lambda x: x['ticket_subject'])
ticket_types = customer_ratings.batch(200).map(lambda x: x['ticket_type'])
ticket_descriptions = customer_ratings.batch(200).map(lambda x: x['ticket_description'])

In [37]:
unique_operator_names = np.unique(np.concatenate(list(operator_names)))
unique_customer_names = np.unique(np.concatenate(list(customer_names)))

---

# Модель пошуку

In [17]:
retrieval_model = RetrievalModel(
    operators=operators,
    layer_sizes=[1024, 256, 128, 64, 32],
    unique_customer_names=unique_customer_names,
    unique_operator_names=unique_operator_names,
    ticket_subjects=ticket_subjects,
    ticket_types=ticket_types,
    ticket_descriptions=ticket_descriptions
)

In [18]:
retrieval_model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.001))



In [19]:
cached_train = train.shuffle(6_000).batch(10000).cache()
cached_test = test.batch(512).cache()

In [20]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.0001)
]

In [22]:
retrieval_model.fit(
    cached_train, 
    epochs=50,
    validation_data=cached_test,
    callbacks=callbacks,
    # validation_freq=5
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50


<keras.src.callbacks.History at 0x2f9820cd0>

In [60]:
retrieval_model.save_weights('models/retrieval_model', save_format='tf')

In [23]:
retrieval_model.evaluate(cached_test, return_dict=True)



{'factorized_top_k/top_1_categorical_accuracy': 0.42875000834465027,
 'factorized_top_k/top_5_categorical_accuracy': 0.4699999988079071,
 'factorized_top_k/top_10_categorical_accuracy': 0.484375,
 'factorized_top_k/top_50_categorical_accuracy': 0.528124988079071,
 'factorized_top_k/top_100_categorical_accuracy': 0.5512499809265137,
 'loss': 266.1716003417969,
 'regularization_loss': 0,
 'total_loss': 266.1716003417969}

In [24]:
index = tfrs.layers.factorized_top_k.BruteForce(retrieval_model.query_model, k=20)

In [25]:
index.index_from_dataset(
  tf.data.Dataset.zip((operators.batch(100), operators.batch(100).map(retrieval_model.candidate_model)))
)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x31bf2ce50>

# Задаємо ім'я клієнта та тему звернення

In [26]:
customer_test = {'customer_name': tf.expand_dims('Heather Love', axis=0), 
                 'ticket_subject': tf.expand_dims('Product setup', axis=0),
                 'ticket_type': tf.expand_dims('Product setup', axis=0),
                 'ticket_description': tf.expand_dims('I could not set up my new camera', axis=0)
                }

In [27]:
_, operator_suggestions = index(customer_test)
print(
    f"Recommendations for customer {customer_test['customer_name']}: {operator_suggestions[0, :]}"
)

Recommendations for customer [b'Heather Love']: [b'Emily Osborne' b'Kevin Rogers' b'Catherine Gonzalez' b'Donna Thomas'
 b'John Michael' b'Gabriella Reyes' b'Paige Brown' b'Jason Hoffman'
 b'George King' b'Alan Graves' b'Kevin Wong' b'Vincent Patton'
 b'Jill Flores' b'Rodney Green' b'Luis Torres' b'Lisa Cherry'
 b'Melissa Grant' b'Matthew Wallace' b'Robert Chavez' b'Joseph Sanders']


---

# Модель ренкінгу

In [38]:
ratings = tf.data.Dataset.from_tensor_slices({
    'operator_name': customer_ratings_df['operator'],
    'customer_name': customer_ratings_df['customer_name'],
    'rating': customer_ratings_df['customer_satisfaction_rating'],
})

In [39]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(8_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(6_400)
test = shuffled.skip(6_400).take(1_600)

In [40]:
ranking_model = RankingModel(
    unique_customer_names=unique_customer_names, 
    unique_operator_names=unique_operator_names)

In [41]:
ranking_model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.002))



In [42]:
cached_train = train.shuffle(6_000).batch(10000).cache()
cached_test = test.batch(4000).cache()

In [52]:
ranking_model.fit(cached_train, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.src.callbacks.History at 0x2a0739690>

In [61]:
ranking_model.save_weights('models/ranking_model', save_format='tf')

In [53]:
ranking_model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 0.9848766922950745,
 'loss': 0.9699821472167969,
 'regularization_loss': 0,
 'total_loss': 0.9699821472167969}

# Беремо список рекомендованих операторів отриманих з моделі пошуку та передаємо їх до моделі ренкінгу

In [54]:
suggested_operators = operator_suggestions[0].numpy()

In [55]:
suggested_operators

array([b'Emily Osborne', b'Kevin Rogers', b'Catherine Gonzalez',
       b'Donna Thomas', b'John Michael', b'Gabriella Reyes',
       b'Paige Brown', b'Jason Hoffman', b'George King', b'Alan Graves',
       b'Kevin Wong', b'Vincent Patton', b'Jill Flores', b'Rodney Green',
       b'Luis Torres', b'Lisa Cherry', b'Melissa Grant',
       b'Matthew Wallace', b'Robert Chavez', b'Joseph Sanders'],
      dtype=object)

In [56]:
test_ratings = {}
for operator_name in suggested_operators:
    test_ratings[operator_name] = ranking_model({
        "customer_name": np.array(['Heather Love']),
        "operator_name": np.array([operator_name])
    })

In [57]:
print("Ratings:")
for title, score in sorted(test_ratings.items(), key=lambda x: x[1], reverse=True):
    print(f"operator name: {title}: {score}")

Ratings:
operator name: b'Paige Brown': [[3.0261972]]
operator name: b'Matthew Wallace': [[2.9956973]]
operator name: b'Kevin Wong': [[2.977243]]
operator name: b'Rodney Green': [[2.94337]]
operator name: b'Luis Torres': [[2.9375243]]
operator name: b'Jill Flores': [[2.9280162]]
operator name: b'Gabriella Reyes': [[2.9237607]]
operator name: b'Robert Chavez': [[2.922796]]
operator name: b'Lisa Cherry': [[2.907502]]
operator name: b'Kevin Rogers': [[2.8955607]]
operator name: b'Donna Thomas': [[2.894316]]
operator name: b'Vincent Patton': [[2.8747199]]
operator name: b'Jason Hoffman': [[2.8743894]]
operator name: b'Alan Graves': [[2.8727229]]
operator name: b'Emily Osborne': [[2.8639083]]
operator name: b'Joseph Sanders': [[2.8479955]]
operator name: b'Melissa Grant': [[2.8398156]]
operator name: b'John Michael': [[2.8358023]]
operator name: b'George King': [[2.8187563]]
operator name: b'Catherine Gonzalez': [[2.8170025]]


In [58]:
filtered_df = customer_ratings_df[customer_ratings_df['operator'].isin(operator_suggestions[0].numpy().astype(str))]

In [59]:
filtered_df.groupby('operator')['customer_satisfaction_rating'].mean().sort_values(ascending=False)

operator
Jill Flores           4.000000
Kevin Wong            3.666667
Alan Graves           3.500000
Rodney Green          3.333333
Paige Brown           3.250000
George King           3.166667
Melissa Grant         3.166667
Matthew Wallace       3.142857
Emily Osborne         3.100000
Jason Hoffman         3.045455
Robert Chavez         3.000000
Kevin Rogers          3.000000
John Michael          3.000000
Donna Thomas          2.875000
Vincent Patton        2.875000
Gabriella Reyes       2.857143
Joseph Sanders        2.733333
Luis Torres           2.722222
Lisa Cherry           2.700000
Catherine Gonzalez    2.642857
Name: customer_satisfaction_rating, dtype: float64

In [None]:
filtered_df['ticket_subject'].value_counts()