In [2]:
# Standard library
from datetime import datetime, timedelta
import math
# Data manipulation
import numpy as np
import pandas as pd
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
# Machine Learning
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    roc_curve,
    precision_recall_curve,
    average_precision_score,
    roc_auc_score, 
    roc_curve, 
    precision_recall_curve, 
    auc
)
import lightgbm as lgb
from functools import reduce
import operator
import json
import os, pickle

# Loading Historical Dataset with Escalation Predictions

 - Escalation Predictions ( Threshold >= 0.5 ) are attached using :
 /Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/src/experiment/escalation_model_ml_pipeline.ipynb

In [3]:
# dataset_original = pd.read_csv('/Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/dataset/processed_dataset/dataset.csv').iloc[:,1:]
# dataset_original['DELIVERY_ID'] = dataset_original['DELIVERY_ID'].astype(np.float64)

# dataset_predictions = pd.read_csv('/Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/dataset/processed_dataset/dataset_predictions.csv').iloc[:,1:]
# dataset_predictions['DELIVERY_ID'] = dataset_predictions['DELIVERY_ID'].astype(np.float64)

# dataset = pd.merge(dataset_original, dataset_predictions, how = 'inner', left_on = 'DELIVERY_ID', right_on = 'DELIVERY_ID')

dataset  = pd.read_csv('/Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/dataset/processed_dataset/final_dataset_negotiation_agent.csv')

In [4]:
dataset.head()

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,DELIVERY_ID,CONVERSATION,IS_CNR_ABUSER,Parsed_AC,ORDER_SUBTOTAL,IS_VIP_CUSTOMER,ISSUE_COUNT_LAST_10_ORDERS,ISSUE_COUNT_LAST_10_DAYS,PREDICTED_ESCALATION_PROB,SH_CNR,CONVERSATION_HUMAN_AGENT,CONVERSATION_CB,Extracted_AC,Final_AC
0,0,0,0,217985700000.0,"Chatbot: Hi Deepak, I'm your DoorDash virtual ...",0.566943,0.0,33.98,0.0,0.0,1.0,0.102361,16.88,1,"Chatbot: Hi Deepak, I'm your DoorDash virtual ...",0.0,0.0
1,1,1,1,356023100000.0,"Chatbot: Hi aj, I'm your DoorDash virtual assi...",0.915407,0.0,0.0,0.0,0.0,4.0,0.008399,-1.0,1,"Chatbot: Hi aj, I'm your DoorDash virtual assi...",0.0,0.0
2,2,2,2,262182600000.0,"Chatbot: Hi Luke, I'm your DoorDash virtual as...",0.099998,0.0,39.0,0.0,0.0,15.0,0.349462,-1.0,1,"Chatbot: Hi Luke, I'm your DoorDash virtual as...",0.0,0.0
3,3,3,3,276872300000.0,"Chatbot: Hi Colin, I'm your DoorDash virtual a...",0.014588,0.0,15.16,0.0,1.0,1.0,0.198509,-1.0,1,"Chatbot: Hi Colin, I'm your DoorDash virtual a...",0.0,0.0
4,4,4,4,318424200000.0,"Chatbot: Hi Cassandra, I'm your DoorDash virtu...",0.240459,0.0,28.0,0.0,0.0,0.0,0.18531,-1.0,1,"Chatbot: Hi Cassandra, I'm your DoorDash virtu...",0.0,0.0


# Historically : Percentage of Conversations with Apology Credits Issues 

In [5]:
print(f'Apology Credits Distribution Historically : \n {dataset['ACTUAL_AC_CONVERSATION'].value_counts(1)}')


KeyError: 'ACTUAL_AC_CONVERSATION'

# Analysis AC Credits

In [6]:
dataset.head()

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,DELIVERY_ID,CONVERSATION,IS_CNR_ABUSER,Parsed_AC,ORDER_SUBTOTAL,IS_VIP_CUSTOMER,ISSUE_COUNT_LAST_10_ORDERS,ISSUE_COUNT_LAST_10_DAYS,PREDICTED_ESCALATION_PROB,SH_CNR,CONVERSATION_HUMAN_AGENT,CONVERSATION_CB,Extracted_AC,Final_AC
0,0,0,0,217985700000.0,"Chatbot: Hi Deepak, I'm your DoorDash virtual ...",0.566943,0.0,33.98,0.0,0.0,1.0,0.102361,16.88,1,"Chatbot: Hi Deepak, I'm your DoorDash virtual ...",0.0,0.0
1,1,1,1,356023100000.0,"Chatbot: Hi aj, I'm your DoorDash virtual assi...",0.915407,0.0,0.0,0.0,0.0,4.0,0.008399,-1.0,1,"Chatbot: Hi aj, I'm your DoorDash virtual assi...",0.0,0.0
2,2,2,2,262182600000.0,"Chatbot: Hi Luke, I'm your DoorDash virtual as...",0.099998,0.0,39.0,0.0,0.0,15.0,0.349462,-1.0,1,"Chatbot: Hi Luke, I'm your DoorDash virtual as...",0.0,0.0
3,3,3,3,276872300000.0,"Chatbot: Hi Colin, I'm your DoorDash virtual a...",0.014588,0.0,15.16,0.0,1.0,1.0,0.198509,-1.0,1,"Chatbot: Hi Colin, I'm your DoorDash virtual a...",0.0,0.0
4,4,4,4,318424200000.0,"Chatbot: Hi Cassandra, I'm your DoorDash virtu...",0.240459,0.0,28.0,0.0,0.0,0.0,0.18531,-1.0,1,"Chatbot: Hi Cassandra, I'm your DoorDash virtu...",0.0,0.0


# (Credit and Refund) CnR Calcualtion Historically

 - Find Dataset columns with CNR name in it
 - Replace SH_CNR ( Self Help CnR ) with -1
 - Find Total SH_CNR where the amount offered is > $0

In [7]:
print('find dataset columns with CNR in it')
for column in dataset.columns.tolist():
    if 'CNR' in column:
        print(column)


find dataset columns with CNR in it
IS_CNR_ABUSER
SH_CNR


In [8]:
print('Replace empty SH_CNR values with -1')
dataset['SH_CNR'] = dataset['SH_CNR'].fillna(-1)
print(f'find total SH_CNR where the amount offered is > $0 : {dataset[dataset['SH_CNR'] != -1]['SH_CNR'].sum()}')

Replace empty SH_CNR values with -1
find total SH_CNR where the amount offered is > $0 : 2199235.9600000004


# AC Credits by Human Agents (Distribution)


In [9]:
dataset.head()

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,DELIVERY_ID,CONVERSATION,IS_CNR_ABUSER,Parsed_AC,ORDER_SUBTOTAL,IS_VIP_CUSTOMER,ISSUE_COUNT_LAST_10_ORDERS,ISSUE_COUNT_LAST_10_DAYS,PREDICTED_ESCALATION_PROB,SH_CNR,CONVERSATION_HUMAN_AGENT,CONVERSATION_CB,Extracted_AC,Final_AC
0,0,0,0,217985700000.0,"Chatbot: Hi Deepak, I'm your DoorDash virtual ...",0.566943,0.0,33.98,0.0,0.0,1.0,0.102361,16.88,1,"Chatbot: Hi Deepak, I'm your DoorDash virtual ...",0.0,0.0
1,1,1,1,356023100000.0,"Chatbot: Hi aj, I'm your DoorDash virtual assi...",0.915407,0.0,0.0,0.0,0.0,4.0,0.008399,-1.0,1,"Chatbot: Hi aj, I'm your DoorDash virtual assi...",0.0,0.0
2,2,2,2,262182600000.0,"Chatbot: Hi Luke, I'm your DoorDash virtual as...",0.099998,0.0,39.0,0.0,0.0,15.0,0.349462,-1.0,1,"Chatbot: Hi Luke, I'm your DoorDash virtual as...",0.0,0.0
3,3,3,3,276872300000.0,"Chatbot: Hi Colin, I'm your DoorDash virtual a...",0.014588,0.0,15.16,0.0,1.0,1.0,0.198509,-1.0,1,"Chatbot: Hi Colin, I'm your DoorDash virtual a...",0.0,0.0
4,4,4,4,318424200000.0,"Chatbot: Hi Cassandra, I'm your DoorDash virtu...",0.240459,0.0,28.0,0.0,0.0,0.0,0.18531,-1.0,1,"Chatbot: Hi Cassandra, I'm your DoorDash virtu...",0.0,0.0


In [14]:


#result = dataset[(dataset['PREDICTED_ESCALATION_PROB'] >= 0.5) & (dataset['IS_CNR_ABUSER'] < 0.5) & (dataset['Final_AC'] > 0)]['DELIVERY_ID'].nunique()

result = dataset[(dataset['PREDICTED_ESCALATION_PROB'] >= 0.5) & (dataset['Final_AC'] > 0)]['DELIVERY_ID'].nunique()

print(f'Number of Unique Delivery IDs escalated to Human Agentsd and Received Apology Credits from Human Agents : {result}')

print(f'Total Apology Credits Given Out by Human Agents : ${dataset[(dataset['PREDICTED_ESCALATION_PROB'] >= 0.5) & (dataset['Final_AC'] > 0)]['Final_AC'].sum()}')

Number of Unique Delivery IDs escalated to Human Agentsd and Received Apology Credits from Human Agents : 2987
Total Apology Credits Given Out by Human Agents : $37837.71000000001


# MTO Calculation : Historically
 - MTO : Manual TakeOver, Using CONVERSATION as indicator, find those cases where the case was escalated to Human Agent

In [11]:
dataset_escalated = dataset[dataset['PREDICTED_ESCALATION_PROB'] >= 0.5]
print(f'Number of Unique Delivery IDs escalated to Human Agents : {dataset_escalated.shape[0]}')
print(f'Percentage of Unique Delivery IDs with escalated to Human Agents : {dataset_escalated.shape[0] / dataset.shape[0]}')


Number of Unique Delivery IDs escalated to Human Agents : 13131
Percentage of Unique Delivery IDs with escalated to Human Agents : 0.009286152240281577


In [60]:
# print(dataset_escalated['CONVERSATION'].iloc[0])

sampled_conversation = dataset_escalated['CONVERSATION'].iloc[0]
all_lines = sampled_conversation.split('\n')

for index, line in enumerate(all_lines):
    if '$' in line.lower():
        print(f'Index : {index} Line : {line}')



Index : 31 Line : Human Agent: For the Inconvenience caused , I have processed $30 additional credits, you can consume this immediately upon your next order.
