# 1. Imports

In [2]:
import os, time, re, ast
import numpy as np
from datetime import timedelta, datetime
import pandas as pd, json
from scipy.special import softmax

import logging
# logging.basicConfig(level=logging.INFO)
# Configure basic logging with INFO level and write to a file
logging.basicConfig(filename='geminie_info.log', level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
# logging.basicConfig(level=logging.WARNING)

from google.cloud import bigquery, storage

import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models


# Sentiments
from transformers import pipeline
from transformers import AutoTokenizer, AutoConfig
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification


# Shap Significance
import shap

# 2. Variables

In [3]:
# Environment
project="eid-dw-dev-a5cf"

location="us-central1"

bucket_name="tte-eid-d-gcs-service-desk-call-recordings"

vertexai.init(project="eid-dw-dev-a5cf", location="us-central1")

# Gemini Model
model = GenerativeModel("gemini-1.5-pro-preview-0409")

# generation_config = {
#     "temperature": 0,
#     "top_p": 0.95,
# }

# safety_settings = {
#     generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
#     generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
#     generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
#     generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
# }

# Audio Files
# uris = [
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Ujet/ujet-voice-recordings_call-279902.mp3",
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Ujet/ujet-voice-recordings_call-2807561.mp3",
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Ujet/ujet-voice-recordings_call-281491.mp3",
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Calls/GMT20240327-150827_Recording.mp3",
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Calls/GMT20240328-121307_Recording.mp3",
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Calls/GMT20240328-131703_Recording.mp3",
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Calls/GMT20240328-142841_Recording.mp3",    
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Calls/GMT20240328-151417_Recording.mp3",
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Calls/GMT20240328-162135_Recording.mp3",
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Calls/GMT20240328-165108_Recording.mp3",
#     "gs://tte-eid-d-gcs-mlops-users-files/VoiceAI/Calls/GMT20240328-184817_Recording.mp3",    
# ]

# 3. Util Functions

#### Audio Converter

In [57]:
# to conver m4a files to mp3

# import pydub

# # Load the m4a file
# fileLoc = '/home/jupyter/VoiceAI/calls/GMT20240328-184817_Recording.m4a'
# m4a_file = pydub.AudioSegment.from_file(fileLoc)

# # Export the audio file as mp3
# m4a_file.export('/home/jupyter/VoiceAI/calls/GMT20240328-184817_Recording.mp3', format="mp3")

#### Get Calls audio from GCS bucket

In [20]:
def get_calls_from_gcs_bucket(bucket_name):
    files = {}
    """Lists all the blobs in the bucket."""
    # bucket_name = "your-bucket-name"

    storage_client = storage.Client()

    # Note: Client.list_blobs requires at least package version 1.17.0.
    blobs = storage_client.list_blobs(bucket_name)

    # get files between 2 and 10 MB
    for blob in blobs:
        if round(blob.size/(1024 * 1024), 2)> 2.00 and round(blob.size/(1024 * 1024), 2) < 60.00:
            files["gs://"+bucket_name+"/"+blob.name] = round(blob.size/(1024 * 1024), 2)
    return files
logging.info("Function: To read calls from GCS Bucket: get_calls_from_gcs_bucket()")

uris = get_calls_from_gcs_bucket(bucket_name)
list(uris.keys())[:5]

['gs://tte-eid-d-gcs-service-desk-call-recordings/ujet-voice-recordings/call-1.wav',
 'gs://tte-eid-d-gcs-service-desk-call-recordings/ujet-voice-recordings/call-1000.wav',
 'gs://tte-eid-d-gcs-service-desk-call-recordings/ujet-voice-recordings/call-10000.wav',
 'gs://tte-eid-d-gcs-service-desk-call-recordings/ujet-voice-recordings/call-100000.wav',
 'gs://tte-eid-d-gcs-service-desk-call-recordings/ujet-voice-recordings/call-100002.wav']

#### initiate_master_dataframes

In [21]:
def initiate_master_dataframes():
    if os.path.isfile("df_intra_calls_data.csv"):
        df_intra_calls_data = pd.read_csv("df_intra_calls_data.csv")
        df_intra_calls_data.call_id = df_intra_calls_data.call_id.astype('string')
        logging.info("Read data from existing - df_intra_calls_data.")
    else:
        df_intra_calls_data = pd.DataFrame()
        logging.info("Initiated null DataFrame: df_intra_calls_data")


    if os.path.isfile("df_inter_calls_data.csv"):
        df_inter_calls_data = pd.read_csv("df_inter_calls_data.csv")
        df_inter_calls_data.call_id = df_inter_calls_data.call_id.astype('string')
        logging.info("Read data from existing - df_inter_calls_data.")
    else:
        df_inter_calls_data = pd.DataFrame()
        logging.info("Initiated null DataFrame: df_inter_calls_data")
        
    return df_intra_calls_data, df_inter_calls_data
logging.info("Function: To initiate Master DataFrame: initiate_master_dataframes()")

#### generate_resp_from_transcript

In [22]:
def generate_resp_from_transcript(model, generation_config, safety_settings, task, prompt, transcript):
    logging.info("Started: "+task)
    responses = model.generate_content(
        [prompt, transcript],
        generation_config=generation_config,
        # safety_settings=safety_settings,
        stream=True
    )
    
    text_list = []
    for response in responses:
        text_list.append(response.text)
    logging.info(text_list[:100])
    logging.info("Completed: "+task)
    return text_list
logging.info("Function: To get Gemini response from Call Transcript: generate_resp_from_transcript()")

#### generate_resp_from_audio

In [23]:
def generate_resp_from_audio(model, task, prompt, uri):
    audio = Part.from_uri(
        mime_type="audio/wav",
        uri=uri
    )

    logging.info("Started: "+task)
    response = model.generate_content(
        [prompt, audio]
    )
    
    logging.info(response.text.splitlines()[:10])
    return response.text.splitlines()
logging.info("Function: To get Gemini response from Call Audio: generate_resp_from_audio()")

#### convert_transcript_to_df

In [24]:
def convert_transcript_to_df(text_list):
    # Split each line into a list of elements
    data_list = []
    for line in text_list:
        elements = line.split(",")
        if len(elements) == 5:
            data_list.append({
                "start": elements[0],
                "end": elements[1],
                "speaker": elements[2],
                "speaker_label": elements[3],
                "text": elements[4]
            })

    # Create DataFrame from list of dictionaries
    df_transcription = pd.DataFrame(data_list)
    logging.info("Completed DataFrame creation of transcription.")
    return df_transcription
logging.info("Function: Convert Transcript to Dataframe: convert_transcript_to_df()")

#### process_call

In [37]:
def process_call(model, transcript_prompt, uri, call_id, df_intra_calls_data, df_inter_calls_data):
    logging.info("Processing call.")
    start = time.time()
    task = "generate transcript."
    text_list = generate_resp_from_audio(model, task, transcript_prompt, uri)
    end = time.time()
    logging.info("Time to transcribe calls "+call_id+": "+str(timedelta(seconds=end-start)))
    df_intra_call = convert_transcript_to_df(text_list)
    df_intra_call.insert(loc=0, column='call_id', value=call_id)

    logging.info("Creating inter-call DataFrame.")
    df_inter_call = pd.DataFrame()
    df_inter_call['call_id'] = [call_id]
    df_inter_call['call_uri'] = uri
    df_inter_call['call_text'] = " ".join(df_intra_call.text)
    logging.info("Completed adding Call Conversationg full text.")


    # Appending to Intra-calls Master DataFrame
    df_intra_calls_data = pd.concat([df_intra_calls_data, df_intra_call], ignore_index=True)
    logging.info("Completed appending to Intra-calls Master DataFrame.")

    df_intra_calls_data.to_csv("df_intra_calls_data.csv", index=False)
    logging.info("Completed Persisting df_intra_calls_data with "+str(call_id)+" to CSV.")

    # Appending to Inter-calls Master DataFrame
    df_inter_calls_data = pd.concat([df_inter_calls_data, df_inter_call], ignore_index=True)
    logging.info("Completed appended to Inter-calls Master DataFrame.")

    df_inter_calls_data.to_csv("df_inter_calls_data.csv", index=False)
    logging.info("Completed Persisting df_inter_calls_data Dataframe to CSV.")
    logging.info("----              ")
    logging.info("----              ")
logging.info("Function: to process calls: process_call()")

#### get_sentiment_label

In [38]:
def get_sentiment_label(row):
  # Check conditions in order of priority (Positive > Negative > Neutral)
  if row['positive'] > row['negative'] and row['positive'] > row['neutral']:
    return 'POSITIVE'
  elif row['negative'] > row['positive'] and row['negative'] > row['neutral']:
    return 'NEGATIVE'
  else:
    return 'NEUTRAL'
logging.info("Function: To get Sentiment Label for the text: get_sentiment_label()")

In [27]:
def get_shap_word_importance(explainer,text):
    try:
        shap_values = explainer([text],fixed_context=1)[0]
        word_imp=get_word_imp(shap_values)
        result={'WordImportance':word_imp}
        return result
    except Exception as e:
        print('run time error',str(e))
logging.info("Function: To get Shap Word Importance: get_shap_word_importance()")

In [28]:
def preprocess_text(text):
    prcsd_text = text.lower()
    prcsd_text = prcsd_text.translate(str.maketrans('', '', string.punctuation))
    prcsd_text = " ".join([word for word in str(prcsd_text).split() if word not in STOPWORDS])
    return prcsd_text
logging.info("Function: To preprocess text: preprocess_text()")

In [67]:
def get_word_imp(shapV):
    result = []
    for word, nums in zip(shapV.data, shapV.values):
        result.append({'SentimentWord': word,'SentimentWordImportanceNegative': nums[0],'SentimentWordImportancePositive': nums[1]})
    return result
logging.info("Function: To get word level significance: get_word_imp()")

In [68]:
def create_sorted_words_df(data_str):
    # Use ast.literal_eval to safely convert the string to a list
    data_list = ast.literal_eval(data_str)

    # Create DataFrames for positive and negative words
    df_positive = pd.DataFrame(data_list)[['SentimentWord', 'SentimentWordImportancePositive']]
    df_negative = pd.DataFrame(data_list)[['SentimentWord', 'SentimentWordImportanceNegative']]

    # Sort by importance (descending order) and select top 3
    top_positive_words = df_positive.sort_values(by='SentimentWordImportancePositive', ascending=False).head(3)['SentimentWord'].tolist()
    top_negative_words = df_negative.sort_values(by='SentimentWordImportanceNegative', ascending=False).head(3)['SentimentWord'].tolist()

    # Create the DataFrame with desired columns
    data = {
        'MaxPositiveSentimentWord': top_positive_words[0],
        'MaxPositiveSentimentWord2': top_positive_words[1],
        'MaxPositiveSentimentWord3': top_positive_words[2],
        'MaxNegativeSentimentWord': top_negative_words[0],
        'MaxNegativeSentimentWord2': top_negative_words[1],
        'MaxNegativeSentimentWord3': top_negative_words[2]
    }
    return data
logging.info("Function: To create dictionary of shap value sorted words: process_call()")

# 5. Transcribing Calls

In [29]:
prompt_transcript="""
Transcribe this audio file, in the comma separated format: start,end,speaker,speaker_label,caption.
00:05,00:07,speaker name,System,caption
00:05,00:07,speaker name,Employee,caption
00:05,00:07,speaker name,Agent,caption
00:05,00:07,speaker name,Employee,caption
"""

### Without Retry

In [40]:
masterstart = time.time()
process = "transcribing"
for uri in uris:
    # if uri.split("/")[-1].startswith('GMT'):
    #     call_id = uri.split("/")[-1].split("-")[1].split("_")[0]
    # elif uri.split("/")[-1].startswith('ujet'):
    #     call_id = uri.split("/")[-1].split("_")[1].split("-")[1].split(".")[0]
    logging.info(uri)
    call_id = uri.split("-")[-1].split(".")[0]
    logging.info("Started processing call: "+str(call_id))
    print(("Started processing call: "+str(call_id)))
    
    # Initiating Master DataFrames
    df_intra_calls_data, df_inter_calls_data = initiate_master_dataframes()
    
    # Check if DataFrame exists in Memory
    if len(df_intra_calls_data) > 0:
        # Check if Call is already Processed
        if call_id in df_intra_calls_data.call_id.unique():
            logging.info("Call already Processed.")
            logging.info("                                         ")
            logging.info("                                         ")
        else:
            logging.info("Processing call.")
            process_call(model, prompt_transcript, uri, call_id, df_intra_calls_data, df_inter_calls_data)
            logging.info("Completed processing call.")
            # logging.info("Sleeping for 5 mins.")
            # time.sleep(5 * 60)
            
    else:
        logging.info("Processing call.")
        process_call(model, prompt_transcript, uri, call_id, df_intra_calls_data, df_inter_calls_data)
        logging.info("Completed processing call.")
        # logging.info("Sleeping for 5 mins.")
        # time.sleep(5 * 60)
    
masterend = time.time()
logging.info("Time to Process all calls "+str(len(uris))+": "+str(timedelta(seconds=masterend-masterstart)))

Started processing call: 1
Started processing call: 1000
Started processing call: 10000
Started processing call: 100000
Started processing call: 100002


KeyboardInterrupt: 

### With Retry

In [13]:
processed_calls = []
exceptioned_calls = {}

In [14]:
exceptioned_calls

{}

In [18]:
# def try_call_processing():
masterstart = time.time()
process = "transcribing"
for uri in uris:
    # if uri.split("/")[-1].startswith('GMT'):
    #     call_id = uri.split("/")[-1].split("-")[1].split("_")[0]
    # elif uri.split("/")[-1].startswith('ujet'):
    #     call_id = uri.split("/")[-1].split("_")[1].split("-")[1].split(".")[0]
    logging.info(uri)
    call_id = uri.split("-")[-1].split(".")[0]
    if call_id not in exceptioned_calls.keys():

        try:
            logging.info("Started processing call: "+str(call_id))
            print("Started processing call: "+str(call_id))

            # Initiating Master DataFrames
            df_intra_calls_data, df_inter_calls_data = initiate_master_dataframes()

            # Check if DataFrame exists in Memory
            if len(df_intra_calls_data) > 0:
                # Check if Call is already Processed
                if call_id in df_intra_calls_data.call_id.unique():
                    print("Call already Processed.")
                    logging.info("Call already Processed.")
                    logging.info("                                         ")
                    logging.info("                                         ")
                else:
                    logging.info("Processing call.")
                    process_call(model, generation_config, safety_settings, transcript_prompt, uri, call_id, df_intra_calls_data, df_inter_calls_data)
                    processed_calls.append(call_id)
                    logging.info("Completed processing call.")
                    print("Completed processing call.")
                    # logging.info("Sleeping for 5 mins.")
                    # time.sleep(5 * 60)

            else:
                logging.info("Processing call.")
                process_call(model, generation_config, safety_settings, transcript_prompt, uri, call_id, df_intra_calls_data, df_inter_calls_data)
                logging.info("Completed processing call.")
                print("Completed processing call.")
                # logging.info("Sleeping for 5 mins.")
                # time.sleep(5 * 60)

        except Exception as e:
            # exceptioned_calls.append({call_id: e})
            exceptioned_calls[call_id] = e
            logging.info("Cautght Exception: "+str(call_id))
            print("Cautght Exception: "+str(call_id))

masterend = time.time()
logging.info("Time to Process all calls "+str(len(uris))+": "+str(timedelta(seconds=masterend-masterstart)))

Started processing call: 1
Cautght Exception: 1
Started processing call: 1000
Cautght Exception: 1000
Started processing call: 10000
Cautght Exception: 10000
Started processing call: 100000
Cautght Exception: 100000
Started processing call: 100002
Cautght Exception: 100002
Started processing call: 100003
Cautght Exception: 100003
Started processing call: 100004
Cautght Exception: 100004
Started processing call: 100007
Cautght Exception: 100007
Started processing call: 10001
Cautght Exception: 10001
Started processing call: 100011
Cautght Exception: 100011
Started processing call: 100012
Cautght Exception: 100012
Started processing call: 100013
Cautght Exception: 100013
Started processing call: 100015
Cautght Exception: 100015
Started processing call: 100017
Cautght Exception: 100017
Started processing call: 100019
Cautght Exception: 100019
Started processing call: 100021
Cautght Exception: 100021
Started processing call: 100024
Cautght Exception: 100024
Started processing call: 100025


# 6. Inter Calls KPIs

In [69]:
df_inter_calls_data = pd.read_csv("df_inter_calls_data.csv")
df_inter_calls_data.call_id = df_inter_calls_data.call_id.astype('str')
# df_inter_calls_data.to_csv("df_inter_calls_data.csv", index=False)

In [70]:
df_inter_calls_data.shape

(72, 9)

In [71]:
df_inter_calls_data.call_id.unique()

array(['279902', '2807561', '281491', '150827', '131703', '165108',
       '184817', '1000', '1', '10000', '100002', '100003', '100004',
       '100007', '100009', '10001', '100011', '100012', '100013',
       '100014', '100017', '100018', '100019', '100024', '100025',
       '100026', '100028', '100029', '10003', '100030', '100031',
       '100032', '100033', '100034', '100035', '100036', '100037',
       '100039', '10004', '100040', '100041', '100042', '100043',
       '100044', '100045', '100046', '100047', '100048', '100049',
       '10005', '100050', '100051', '100052', '100053', '100054',
       '100055', '100056', '100057', '100058', '100059', '10006',
       '100060', '100062', '100063', '100064', '100065', '100068',
       '100069', '10007', '100070', '100072', '100073'], dtype=object)

## 6.1 Inter Call Summary

In [72]:
prompt_summary = "Given a call transcription, suggest an informative and concise summary of not more than 100 words."

In [73]:
def get_call_summary(call_text):
    task = "generate summary."
    summary_list = generate_resp_from_transcript(model, generation_config, safety_settings, task, prompt_summary, call_text)
    summary = " ".join(summary_list[1:]).replace("Call Summary:","").replace("\n","").strip()
    # print(summary)
    return summary

In [86]:
current_index = 0
call_summary_list = []

In [91]:
len(call_summary_list)

72

In [88]:
current_index

0

In [90]:
while current_index < df_inter_calls_data.shape[0]:
    print(current_index)
    try:
        call_summary_list.append(get_call_summary(df_inter_calls_data.call_text[current_index]))
        current_index += 1
        
    except Exception as e:
        print("Excepted at: "+str(current_index))
        print("Sleeping for seconds: "+str(30))
        time.sleep(30)

6
7
8
9
10
11
12
Excepted at: 12
Sleeping for seconds: 30
12
Excepted at: 12
Sleeping for seconds: 30
12
13
14
15
16
17
18
Excepted at: 18
Sleeping for seconds: 30
18
19
20
21
22
23
24
Excepted at: 24
Sleeping for seconds: 30
24
Excepted at: 24
Sleeping for seconds: 30
24
25
26
27
28
29
30
Excepted at: 30
Sleeping for seconds: 30
30
31
32
33
34
35
36
Excepted at: 36
Sleeping for seconds: 30
36
Excepted at: 36
Sleeping for seconds: 30
36
37
38
39
40
41
42
Excepted at: 42
Sleeping for seconds: 30
42
43
44
45
46
47
48
Excepted at: 48
Sleeping for seconds: 30
48
Excepted at: 48
Sleeping for seconds: 30
48
49
50
51
52
53
54
Excepted at: 54
Sleeping for seconds: 30
54
55
56
57
58
59
60
Excepted at: 60
Sleeping for seconds: 30
60
61
62
63
64
65
66
Excepted at: 66
Sleeping for seconds: 30
66
Excepted at: 66
Sleeping for seconds: 30
66
67
68
69
70
71


In [92]:
df_inter_calls_data.call_summary=call_summary_list

### Append

In [93]:
df_inter_calls_data.to_csv("df_inter_calls_data.csv", index=False)
df_inter_calls_data

Unnamed: 0,call_id,call_text,call_summary,topic,category,sub_category,overall_user_sentiments,overall_agent_sentiments,agent_coaching
0,279902,Hello. Yes Yeah. All right Yes Okay. Were you ...,The user is experiencing persistent errors whi...,Troubleshooting Windows login error,Technical SupportSub-category,Login Issues,"Frustrated, confused, and eventually resigned.","Patient, repetitive, and methodical.",1. Active Listening: Minimize interruptions li...
1,2807561,Thanks for calling TTech support. My name is L...,"Louie, working from home in production for Blu...",New computer login issues,Technical SupportSub-Category,Login Issues,"Initially frustrated, then relieved and grateful.","Patient, helpful, and professional.",1. Reduce Verification Questions: Streamline t...
2,281491,Thanks for calling TTech support. My name is L...,Louis from Covered California contacted TTech ...,Global Protect login issue,IT SupportSub-Category,Remote Access/VPN Issue,"Initially neutral, becomes slightly frustrated...",Neutral and professional.,1. Active Listening: Avoid interrupting Maria...
3,150827,Thank you for calling Atom services. My name i...,Two-Way Audio Issue**Mr. Maha** contacted Atom...,Troubleshooting two-way audio issues on Zoom.,Technical SupportSub-Category,Audio/Video Issues,"Confused, then cooperative and polite.","Helpful, patient, and professional.",1. Active Listening: Avoid interrupting and us...
4,131703,silence call sound escalated virtual agent cal...,"**Stanley Riles, an employee of Independence B...",Troubleshooting SRW login issues for Stanley ...,Category,Technical SupportSub-category,"Frustrated, impatient, stressed","Calm, helpful, professional",1. Active Listening: Avoid interrupting the c...
...,...,...,...,...,...,...,...,...,...
67,100069,caption Thank you for calling TTech and at hom...,"Tiffany Ellis, a TTech employee working for Da...",,,,,,
68,10007,caption** Thank you for calling TTEC at home s...,TTEC Agent Onboarding AssistanceThis call invo...,,,,,,
69,100070,Hello? Thank you for calling Amazon support Hi...,"Thea Villean Ash Davis, Oracle ID 22 71270, fr...",,,,,,
70,100072,Thank you for calling Adams office first. My n...,Senia Arroyo called on her first day of traini...,,,,,,


## 6.2 Inter Call Topic 

In [113]:
prompt_topic = "Given a call transcription, provide one informative topic in 10 words."

In [114]:
current_index = 0
max_retries = 3
call_topic_list = []

In [115]:
def get_call_topic(call_text):
    task = "generate topic."
    topic_list = generate_resp_from_transcript(model, generation_config, safety_settings, task, prompt_topic, call_text)
    topic = " ".join(topic_list).replace("\n","").strip()
    return topic

In [117]:
while current_index < df_inter_calls_data.shape[0]:
    retries = 0
    while retries < max_retries:
        print(current_index)
        try:
            call_topic_list.append(get_call_topic(df_inter_calls_data.call_text[current_index]))
            current_index += 1

        except Exception as e:
            retries += 1
            print("Excepted at: "+str(current_index))
            print("Sleeping for seconds: "+str(30))
            time.sleep(30)
        
        finally:
            if retries == max_retries:
                current_index += 1
                print("Max Tries done. Append empty value.")
                call_topic_list.append("")

61
62
63
64
65
Excepted at: 65
Sleeping for seconds: 30
65
Excepted at: 65
Sleeping for seconds: 30
65
Excepted at: 65
Sleeping for seconds: 30
Max Tries done. Append empty value.
66
67
68
69
70
71
72
Excepted at: 72
Sleeping for seconds: 30
72
Excepted at: 72
Sleeping for seconds: 30
72
Excepted at: 72
Sleeping for seconds: 30
Max Tries done. Append empty value.


In [122]:
df_inter_calls_data.topic = call_topic_list

### Append

In [123]:
df_inter_calls_data.to_csv("df_inter_calls_data.csv", index=False)
df_inter_calls_data.head(10)

Unnamed: 0,call_id,call_text,call_summary,topic,category,sub_category,overall_user_sentiments,overall_agent_sentiments,agent_coaching
0,279902,Hello. Yes Yeah. All right Yes Okay. Were you ...,The user is experiencing persistent errors whi...,Troubleshooting Windows login error,Technical SupportSub-category,Login Issues,"Frustrated, confused, and eventually resigned.","Patient, repetitive, and methodical.",1. Active Listening: Minimize interruptions li...
1,2807561,Thanks for calling TTech support. My name is L...,"Louie, working from home in production for Blu...",New computer login issue,Technical SupportSub-Category,Login Issues,"Initially frustrated, then relieved and grateful.","Patient, helpful, and professional.",1. Reduce Verification Questions: Streamline t...
2,281491,Thanks for calling TTech support. My name is L...,Louis from Covered California contacted TTech ...,Global Protect login issue,IT SupportSub-Category,Remote Access/VPN Issue,"Initially neutral, becomes slightly frustrated...",Neutral and professional.,1. Active Listening: Avoid interrupting Maria...
3,150827,Thank you for calling Atom services. My name i...,Two-Way Audio Issue**Mr. Maha** contacted Atom...,Troubleshooting two-way audio issues on Zoom.,Technical SupportSub-Category,Audio/Video Issues,"Confused, then cooperative and polite.","Helpful, patient, and professional.",1. Active Listening: Avoid interrupting and us...
4,131703,silence call sound escalated virtual agent cal...,"**Stanley Riles, an employee of Independence B...",Troubleshooting SRW login issues for Stanley ...,Category,Technical SupportSub-category,"Frustrated, impatient, stressed","Calm, helpful, professional",1. Active Listening: Avoid interrupting the c...
5,165108,caption** (Upbeat music playing) Thank you f...,The caller is experiencing issues with Citrix ...,** Citrix freezing issue impacting user access**,Citrix Issue Sub-Category,Freezing/Performance,"Neutral, professional","Frustrated, confused",1. Active Listening: Suma should avoid interr...
6,184817,caption Thank you for calling Atom service des...,"**Diane**, a Peloton employee, contacted the A...",Troubleshooting SRW application launch failure,Technical SupportSub-Category,Software Issue (SRW Application),"Professional, helpful, patient.","Frustrated, confused, but cooperative.",1. Active Listening: Suma should focus on act...
7,1000,Sound of car driving Sound of car stopping Sil...,The call begins with background noise suggesti...,Arrival at destination,,,,,
8,1,Sounds of traffic and car horns Sound of a car...,The call begins with background noise suggesti...,Traffic noise during a phone call.,,,,,
9,10000,silence Thank you for calling At Home Service ...,"Haley Creekmore, a new Ttec employee on her fi...",Agent assists employee with computer setup.,,,,,


## 6.3 Call Category

In [124]:
prmopt_category = "Given a call transcription, provide one category and one sub-category."

In [125]:
current_index = 0
max_retries = 2
categorys, subCategorys = [], []

In [126]:
def get_category_subCategory(call_text):
    task = "generate Category and Sub-Category."
    category_list = generate_resp_from_transcript(model, generation_config, safety_settings, task, prmopt_category, call_text)
    category_subCategory = "".join(category_list).replace("*","").replace("\n","").split(":")[1:]
    category = category_subCategory[0].strip()
    subCategory = category_subCategory[1].strip()
    return category, subCategory

In [128]:
while current_index < df_inter_calls_data.shape[0]:
    retries = 0
    while retries < max_retries:
        print(current_index)
        try:
            category, subCategory = get_category_subCategory(df_inter_calls_data.call_text[current_index])
            categorys.append(category)
            subCategorys.append(subCategory)
            current_index += 1

        except Exception as e:
            retries += 1
            print("Excepted at: "+str(current_index))
            print("Sleeping for seconds: "+str(30))
            time.sleep(30)
        
        finally:
            if retries == max_retries:
                current_index += 1
                print("Max Tries done. Append empty value.")
                categorys.append("")
                subCategorys.append("")

0
1
2
3
4
5
6
Excepted at: 6
Sleeping for seconds: 30
6
7
8
9
10
11
12
Excepted at: 12
Sleeping for seconds: 30
12
13
14
15
16
17
18
Excepted at: 18
Sleeping for seconds: 30
18
Excepted at: 18
Sleeping for seconds: 30
18
19
20
21
22
23
24
Excepted at: 24
Sleeping for seconds: 30
24
Excepted at: 24
Sleeping for seconds: 30
24
25
26
27
28
29
30
Excepted at: 30
Sleeping for seconds: 30
30
Excepted at: 30
Sleeping for seconds: 30
30
31
32
33
34
35
36
Excepted at: 36
Sleeping for seconds: 30
36
37
38
39
40
41
42
Excepted at: 42
Sleeping for seconds: 30
42
Excepted at: 42
Sleeping for seconds: 30
42
43
44
45
46
47
48
Excepted at: 48
Sleeping for seconds: 30
48
Excepted at: 48
Sleeping for seconds: 30
48
49
50
51
52
53
54
Excepted at: 54
Sleeping for seconds: 30
54
Excepted at: 54
Sleeping for seconds: 30
54
55
56
57
58
59
60
Excepted at: 60
Sleeping for seconds: 30
60
61
62
63
64
65
66
Excepted at: 66
Sleeping for seconds: 30
66
Excepted at: 66
Sleeping for seconds: 30
66
67
68
69
70
71
72
E

KeyboardInterrupt: 

In [129]:
df_inter_calls_data['category'] = categorys
df_inter_calls_data['sub_category'] = subCategorys

### Append

In [130]:
df_inter_calls_data.to_csv("df_inter_calls_data.csv", index=False)
df_inter_calls_data.head(10)

Unnamed: 0,call_id,call_text,call_summary,topic,category,sub_category,overall_user_sentiments,overall_agent_sentiments,agent_coaching
0,279902,Hello. Yes Yeah. All right Yes Okay. Were you ...,The user is experiencing persistent errors whi...,Troubleshooting Windows login error,Technical SupportSub-category,Login Issues,"Frustrated, confused, and eventually resigned.","Patient, repetitive, and methodical.",1. Active Listening: Minimize interruptions li...
1,2807561,Thanks for calling TTech support. My name is L...,"Louie, working from home in production for Blu...",New computer login issue,Technical SupportSub-Category,Login Issues,"Initially frustrated, then relieved and grateful.","Patient, helpful, and professional.",1. Reduce Verification Questions: Streamline t...
2,281491,Thanks for calling TTech support. My name is L...,Louis from Covered California contacted TTech ...,Global Protect login issue,IT SupportSub-Category,Remote Access/VPN Issue,"Initially neutral, becomes slightly frustrated...",Neutral and professional.,1. Active Listening: Avoid interrupting Maria...
3,150827,Thank you for calling Atom services. My name i...,Two-Way Audio Issue**Mr. Maha** contacted Atom...,Troubleshooting two-way audio issues on Zoom.,Technical SupportSub-Category,Audio/Video Issues,"Confused, then cooperative and polite.","Helpful, patient, and professional.",1. Active Listening: Avoid interrupting and us...
4,131703,silence call sound escalated virtual agent cal...,"**Stanley Riles, an employee of Independence B...",Troubleshooting SRW login issues for Stanley ...,Category,Technical SupportSub-Category,"Frustrated, impatient, stressed","Calm, helpful, professional",1. Active Listening: Avoid interrupting the c...
5,165108,caption** (Upbeat music playing) Thank you f...,The caller is experiencing issues with Citrix ...,** Citrix freezing issue impacting user access**,Citrix IssueSub-Category,Freezing/Performance,"Neutral, professional","Frustrated, confused",1. Active Listening: Suma should avoid interr...
6,184817,caption Thank you for calling Atom service des...,"**Diane**, a Peloton employee, contacted the A...",Troubleshooting SRW application launch failure,Category,Technical Support Sub-Category,"Professional, helpful, patient.","Frustrated, confused, but cooperative.",1. Active Listening: Suma should focus on act...
7,1000,Sound of car driving Sound of car stopping Sil...,The call begins with background noise suggesti...,Arrival at destination,Category,Environmental Sounds Sub-Category,,,
8,1,Sounds of traffic and car horns Sound of a car...,The call begins with background noise suggesti...,Traffic noise during a phone call.,Category,Environmental Sounds Sub-Category,,,
9,10000,silence Thank you for calling At Home Service ...,"Haley Creekmore, a new Ttec employee on her fi...",Agent assists employee with computer setup.,IT SupportSub-Category,Account Setup/Login Issue,,,


## 6.3 Inter Call Overall Speaker Sentiment

In [131]:
prompt_overall_sentiment_speaker = "Given a call transcription provide overall sentiment for each speaker in few words."

In [132]:
current_index = 0
max_retries = 2
overall_user_sentiments, overall_customer_sentiments = [], []

In [133]:
def get_overall_speakers_sentiment(call_text):
    task = "generate overall speakers sentiments."
    snetiment_list = generate_resp_from_transcript(model, generation_config, safety_settings, task, prompt_overall_sentiment_speaker, call_text)
    overall_sentiments = "".join(snetiment_list).replace("*","").split("\n")[2:-1]
    overall_user_sentiment = overall_sentiments[0].split(":")[1].strip()
    overall_customer_sentiment = overall_sentiments[1].split(":")[1].strip()
    return overall_user_sentiment, overall_customer_sentiment

In [143]:
len(overall_user_sentiments)

20

In [None]:
while current_index < df_inter_calls_data.shape[0]:
    retries = 0
    while retries < max_retries:
        print(current_index)
        try:
            overall_user_sentiment, overall_customer_sentiment = get_overall_speakers_sentiment(df_inter_calls_data.call_text[current_index])
            overall_user_sentiments.append(overall_user_sentiment)
            overall_customer_sentiments.append(overall_customer_sentiment)
            current_index += 1

        except Exception as e:
            retries += 1
            print("Excepted at: "+str(current_index))
            print("Sleeping for seconds: "+str(30))
            time.sleep(30)
        
        finally:
            if retries == max_retries:
                current_index += 1
                print("Max Tries done. Append empty value.")
                overall_user_sentiments.append("")
                overall_customer_sentiments.append("")

21
Excepted at: 21
Sleeping for seconds: 30
21
Excepted at: 21
Sleeping for seconds: 30
Max Tries done. Append empty value.
22
Excepted at: 22
Sleeping for seconds: 30
22
Excepted at: 22
Sleeping for seconds: 30
Max Tries done. Append empty value.
23
24
25
26
27
28
29
Excepted at: 29
Sleeping for seconds: 30
29
30
31
32
33
34
35
Excepted at: 35
Sleeping for seconds: 30
Max Tries done. Append empty value.
36
Excepted at: 36
Sleeping for seconds: 30
36
Excepted at: 36
Sleeping for seconds: 30
Max Tries done. Append empty value.
37
38
Excepted at: 38
Sleeping for seconds: 30
38
Excepted at: 38
Sleeping for seconds: 30
Max Tries done. Append empty value.
39
40
41
42
Excepted at: 42
Sleeping for seconds: 30


In [159]:
df_inter_calls_data['overall_user_sentiments'] = overall_user_sentiments
df_inter_calls_data['overall_agent_sentiments'] = overall_customer_sentiments

### Append

In [160]:
df_inter_calls_data = df_inter_calls_data.rename(columns = {'overall_customer_sentiments': 'overall_agent_sentiments'})

In [161]:
df_inter_calls_data.to_csv("df_inter_calls_data.csv", index=False)
df_inter_calls_data.head(10)

Unnamed: 0,call_id,call_text,call_summary,topic,category,sub_category,overall_user_sentiments,overall_agent_sentiments,agent_coaching
0,279902,Hello. Yes Yeah. All right Yes Okay. Were you ...,The user is experiencing persistent internet c...,Troubleshooting Windows login error,Category,Technical SupportSub-category,"Frustrated, confused, and eventually resigned.","Patient, repetitive, and methodical.",Active Listening & Empathy:
1,2807561,Thanks for calling TTech support. My name is L...,"Louie, working from home in production for Blu...",New computer login issues,Technical SupportSub-Category,Login Issues,"Initially frustrated, then relieved and grateful.","Patient, helpful, and professional.",Areas of Improvement:
2,281491,Thanks for calling TTech support. My name is L...,Louis from Covered California contacted TTech ...,Global Protect login issue,IT SupportSub-Category,Remote Access/VPN Issue,"Initially neutral, becomes slightly frustrated...",Neutral and professional.,Greeting and Introduction:
3,150827,Thank you for calling Atom services. My name i...,Two-Way Audio Issue**Mr. Maha** contacted Atom...,Troubleshooting two-way audio issues on Zoom.,Technical SupportSub-Category,Audio/Video Issues,"Confused, then cooperative and polite.","Helpful, patient, and professional.",Areas for Improvement:
4,131703,silence call sound escalated virtual agent cal...,"**Stanley Riles, an employee of Independence B...",Troubleshooting SRW login issues for Stanley ...,Category,Technical SupportSub-Category,"Frustrated, impatient, stressed","Calm, helpful, professional",
5,165108,caption** (Upbeat music playing) Thank you f...,The caller is experiencing issues with Citrix ...,Citrix freezing issue since 11:50 EST,Citrix IssueSub-Category,Freezing/Performance,"Neutral, professional","Frustrated, confused",
6,184817,caption Thank you for calling Atom service des...,**Diane from the Peloton project is experienci...,,,,"Professional, helpful, patient.","Frustrated, confused, but cooperative.",


## 6.4 Agent Coaching

In [342]:
prompt_coaching = "Given a call transcription provide top 5 coaching tips for Agent to better Customer Experience in less than 100 words."

### Loop

In [344]:
coaching_list = []
for call_text in df_inter_calls_data.call_text:
    task = "generate Agent Coaching."
    coaching_resp = generate_resp_from_transcript(model, generation_config, safety_settings, task, prompt_coaching, call_text)
    coaching = "\n".join("".join(coaching_resp).replace("*","").split("\n")[2:-1])
    coaching_list.append(coaching)

In [None]:
coaching_list

### Manual

In [350]:
len(coaching_list)

7

In [348]:
# call_text = df_inter_calls_data.call_text[6]
# task = "generate Agent Coaching."
# coaching_resp = generate_resp_from_transcript(model, generation_config, safety_settings, task, prompt_coaching, call_text)
# coaching = "\n".join("".join(coaching_resp).replace("*","").split("\n")[2:-1])
# coaching_list.append(coaching)

INFO:root:Started: generate Agent Coaching.
INFO:root:['##', ' Coaching Tips for Suma:\n\n1. **Active Listening:**  Suma should focus', " on actively listening to the customer's issue instead of interrupting with questions. ", '\n2. **Empathy:**  Show empathy and understanding towards the customer\'s frustration. Phrases like "I understand this is frustrating" can go a long', ' way.\n3. **Clear Communication:**  Suma should avoid technical jargon and use simple, clear language to explain steps and solutions.\n4. **Confirmation', ':**  Regularly confirm understanding by summarizing the issue and proposed solutions.\n5. **Patience:**  Be patient with the customer, especially when guiding them through technical steps. \n']
INFO:root:Completed: generate Agent Coaching.


### Append

In [365]:
df_inter_calls_data['agent_coaching'] = coaching_list
df_inter_calls_data.to_csv("df_inter_calls_data.csv", index=False)
df_inter_calls_data.head(10)

Unnamed: 0,call_id,call_text,call_summary,topic,category,sub_category,overall_user_sentiments,overall_agent_sentiments,agent_coaching
0,279902,Hello. Yes Yeah. All right Yes Okay. Were you ...,The user is experiencing persistent internet c...,Troubleshooting Windows login error,Technical SupportSub-category,Login Issues,"Frustrated, confused, and eventually resigned.","Patient, repetitive, and methodical.",1. Active Listening: Minimize interruptions li...
1,2807561,Thanks for calling TTech support. My name is L...,"Louie, working from home in production for Blu...",New computer login issues,Technical SupportSub-Category,Login Issues,"Initially frustrated, then relieved and grateful.","Patient, helpful, and professional.",1. Reduce Verification Questions: Streamline t...
2,281491,Thanks for calling TTech support. My name is L...,Louis from Covered California contacted TTech ...,Global Protect login issue,IT SupportSub-Category,Remote Access/VPN Issue,"Initially neutral, becomes slightly frustrated...",Neutral and professional.,1. Active Listening: Avoid interrupting Maria...
3,150827,Thank you for calling Atom services. My name i...,Two-Way Audio Issue**Mr. Maha** contacted Atom...,Troubleshooting two-way audio issues on Zoom.,Technical SupportSub-Category,Audio/Video Issues,"Confused, then cooperative and polite.","Helpful, patient, and professional.",1. Active Listening: Avoid interrupting and us...
4,131703,silence call sound escalated virtual agent cal...,"**Stanley Riles, an employee of Independence B...",Troubleshooting SRW login issues for Stanley ...,Category,Technical SupportSub-category,"Frustrated, impatient, stressed","Calm, helpful, professional",1. Active Listening: Avoid interrupting the c...
5,165108,caption** (Upbeat music playing) Thank you f...,The caller is experiencing issues with Citrix ...,Citrix freezing issue since 11:50 EST,Citrix Issue Sub-Category,Freezing/Performance,"Neutral, professional","Frustrated, confused",1. Active Listening: Suma should avoid interr...
6,184817,caption Thank you for calling Atom service des...,**Diane from the Peloton project is experienci...,Troubleshooting SRW application launch failure,Technical SupportSub-Category,Software Issue (SRW Application),"Professional, helpful, patient.","Frustrated, confused, but cooperative.",1. Active Listening: Suma should focus on act...


# 7 Intra Calls KPIs

In [235]:
df_intra_calls_data = pd.read_csv("df_intra_calls_data.csv")
df_intra_calls_data.call_id = df_intra_calls_data.call_id.astype('str')

In [239]:
df_intra_calls_data.call_id.unique().tolist()

['279902', '2807561', '281491', '150827', '131703', '165108', '184817']

In [240]:
df_intra_calls_data.head()

Unnamed: 0,call_id,start,end,speaker,text,negative,neutral,positive,SentimentLable,SentimentScore,WordImportance,start_second,end_second,time_spoken,time_silence,speaker_label
0,279902,00:01,00:05,Ramelo,Yes,8.97,45.49,45.53,POSITIVE,0.999781,"[{'SentimentWord': '', 'SentimentWordImportanc...",1.0,5.0,4.0,0.0,Agent
1,279902,00:09,00:11,Joy,Yeah.,9.89,57.98,32.13,POSITIVE,0.999524,"[{'SentimentWord': '', 'SentimentWordImportanc...",9.0,11.0,2.0,4.0,Employee
2,279902,00:11,00:16,Ramelo,All right,7.15,52.12,40.73,POSITIVE,0.999842,"[{'SentimentWord': '', 'SentimentWordImportanc...",11.0,16.0,5.0,0.0,Agent
3,279902,00:22,00:29,Joy,Yes,8.97,45.49,45.53,POSITIVE,0.999781,"[{'SentimentWord': '', 'SentimentWordImportanc...",22.0,29.0,7.0,6.0,Employee
4,279902,00:29,00:32,Ramelo,Okay. Were you,3.45,86.24,10.31,POSITIVE,0.999095,"[{'SentimentWord': '', 'SentimentWordImportanc...",29.0,32.0,3.0,0.0,Agent


## 7.1 Intra Calls Sentiments

In [241]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
model_sentiment = AutoModelForSequenceClassification.from_pretrained(MODEL)
logging.info("Sentiment Analysis Model: cardiffnlp/twitter-roberta-base-sentiment-latest")

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:root:Sentiment Analysis Model: cardiffnlp/twitter-roberta-base-sentiment-latest


In [254]:
logging.info("Started Sentiment and Shap Analysis.")
start = time.time()
dict_sentiments = []
for text in df_intra_calls_data.text:
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model_sentiment(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = np.round(np.multiply(softmax(scores), 100), 2)
    # get feature Importance
    shap_score=get_shap_word_importance(sent_pipeline,explainer,text)
    # logging.info("Completed Shap Score calculations.")
    # Creating the list of Sentiment Dictionary
    merged_dict = {**dict(zip(list(config.id2label.values()), list(scores))), **shap_score}
    dict_sentiments.append(merged_dict)
    
df_intra_calls_data['SentimentLable'] = df_intra_calls_data[['positive','negative','neutral']].apply(get_sentiment_label, axis=1)
end = time.time()
df_intra_calls_data = df_intra_calls_data.join(pd.DataFrame(dict_sentiments))
logging.info("Completed Sentiment Analysis and Shap Scores in: "+str(timedelta(seconds=end-start)))

INFO:root:Started Sentiment and Shap Analysis.
INFO:root:Completed Sentiment Analysis and Shap Scores in: 0:00:32.303235


In [267]:
# logging.info("Started adding shap sorted word list columns.")
# shap_sorted_words_list = [create_sorted_words_df(wordImp_list) for wordImp_list in df_intra_calls_data.WordImportance]
# df_intra_calls_data = df_intra_calls_data.join(pd.DataFrame(shap_sorted_words_list))
# logging.info("Completed adding shap sorted word list columns.")

In [380]:
df_intra_calls_data.to_csv("df_intra_calls_data.csv", index=False)
df_intra_calls_data.head()

Unnamed: 0,call_id,start,end,speaker,text,start_second,end_second,time_spoken,time_silence,speaker_label,negative,neutral,positive,SentimentLable,SentimentScore,WordImportance
0,279902,00:01,00:05,Ramelo,Yes,1.0,5.0,4.0,0.0,Agent,8.97,45.49,45.53,POSITIVE,0.989574,"[{'SentimentWord': '', 'SentimentWordImportanc..."
1,279902,00:09,00:11,Joy,Yeah.,9.0,11.0,2.0,4.0,Employee,9.89,57.98,32.13,NEUTRAL,0.989574,"[{'SentimentWord': '', 'SentimentWordImportanc..."
2,279902,00:11,00:16,Ramelo,All right,11.0,16.0,5.0,0.0,Agent,7.15,52.12,40.73,NEUTRAL,0.989574,"[{'SentimentWord': '', 'SentimentWordImportanc..."
3,279902,00:22,00:29,Joy,Yes,22.0,29.0,7.0,6.0,Employee,8.97,45.49,45.53,POSITIVE,0.989574,"[{'SentimentWord': '', 'SentimentWordImportanc..."
4,279902,00:29,00:32,Ramelo,Okay. Were you,29.0,32.0,3.0,0.0,Agent,3.45,86.24,10.31,NEUTRAL,0.989574,"[{'SentimentWord': '', 'SentimentWordImportanc..."


## 7.2 Adding Silence Time and Spoken Time

In [269]:
def convert_to_seconds(time_str):
    try:
        # Parse time string using datetime
        time_obj = datetime.strptime(time_str, '%M:%S')
        # Convert to timedelta and extract total seconds
        total_seconds = time_obj.minute * 60 + time_obj.second
        return total_seconds
    except ValueError:
        return None  # Handle invalid time format

# Apply the function to each column using vectorized operations
df_intra_calls_data['start_second'] = df_intra_calls_data['start'].apply(convert_to_seconds)
df_intra_calls_data['end_second'] = df_intra_calls_data['end'].apply(convert_to_seconds)

In [270]:
df_intra_calls_data['time_spoken'] = df_intra_calls_data['end_second'] - df_intra_calls_data['start_second']
df_intra_calls_data['time_spoken'] = df_intra_calls_data['time_spoken'].where(df_intra_calls_data['time_spoken'] >= 0, 0)
df_intra_calls_data['time_spoken'] = df_intra_calls_data['time_spoken'].fillna(0)
df_intra_calls_data.head()

Unnamed: 0,call_id,start,end,speaker,text,start_second,end_second,time_spoken,time_silence,speaker_label,negative,neutral,positive,SentimentLable,SentimentScore,WordImportance
0,279902,00:01,00:05,Ramelo,Yes,1.0,5.0,4.0,0.0,Agent,8.97,45.490002,45.529999,POSITIVE,0.989574,"[{'SentimentWord': '', 'SentimentWordImportanc..."
1,279902,00:09,00:11,Joy,Yeah.,9.0,11.0,2.0,4.0,Employee,9.89,57.98,32.130001,POSITIVE,0.989574,"[{'SentimentWord': '', 'SentimentWordImportanc..."
2,279902,00:11,00:16,Ramelo,All right,11.0,16.0,5.0,0.0,Agent,7.15,52.119999,40.73,POSITIVE,0.989574,"[{'SentimentWord': '', 'SentimentWordImportanc..."
3,279902,00:22,00:29,Joy,Yes,22.0,29.0,7.0,6.0,Employee,8.97,45.490002,45.529999,POSITIVE,0.989574,"[{'SentimentWord': '', 'SentimentWordImportanc..."
4,279902,00:29,00:32,Ramelo,Okay. Were you,29.0,32.0,3.0,0.0,Agent,3.45,86.239998,10.31,POSITIVE,0.989574,"[{'SentimentWord': '', 'SentimentWordImportanc..."


In [148]:
df_intra_calls_data['time_silence'] = df_intra_calls_data['start_second'].shift(-1) - df_intra_calls_data['end_second']
df_intra_calls_data['time_silence'] = df_intra_calls_data['time_silence'].where(df_intra_calls_data['time_silence'] >= 0, 0)
df_intra_calls_data['time_silence'] = df_intra_calls_data['time_silence'].fillna(0)
df_intra_calls_data['time_silence'] = pd.DataFrame([0] + df_intra_calls_data['time_silence'].iloc[:-1].tolist())
df_intra_calls_data.head()

Unnamed: 0,call_id,start,end,speaker,text,negative,neutral,positive,SentimentLable,SentimentScore,WordImportance,start_second,end_second,time_spoken,time_silence
0,279902,00:00,00:01,Unknown,Hello.,2.23,42.529999,55.25,POSITIVE,0.999374,"[{'SentimentWord': '', 'SentimentWordImportanc...",0.0,1.0,1.0,0.0
1,279902,00:01,00:05,Ramelo,Yes,8.97,45.490002,45.529999,POSITIVE,0.999781,"[{'SentimentWord': '', 'SentimentWordImportanc...",1.0,5.0,4.0,0.0
2,279902,00:09,00:11,Joy,Yeah.,9.89,57.98,32.130001,POSITIVE,0.999524,"[{'SentimentWord': '', 'SentimentWordImportanc...",9.0,11.0,2.0,4.0
3,279902,00:11,00:16,Ramelo,All right,7.15,52.119999,40.73,POSITIVE,0.999842,"[{'SentimentWord': '', 'SentimentWordImportanc...",11.0,16.0,5.0,0.0
4,279902,00:22,00:29,Joy,Yes,8.97,45.490002,45.529999,POSITIVE,0.999781,"[{'SentimentWord': '', 'SentimentWordImportanc...",22.0,29.0,7.0,6.0


In [149]:
df_intra_calls_data.to_csv("df_intra_calls_data.csv", index=False)

# 9. Persisting to BQ

In [381]:
# import pandas as pd
df_inter_calls_data = pd.read_csv('df_inter_calls_data.csv')
df_inter_calls_data.call_id = df_inter_calls_data.call_id.astype('str')
df_inter_calls_data.to_csv("df_inter_calls_data.csv")
# df_inter_calls_data.head()

df_intra_calls_data = pd.read_csv('df_intra_calls_data.csv')
df_intra_calls_data.call_id = df_intra_calls_data.call_id.astype('str')
df_intra_calls_data.to_csv("df_intra_calls_data.csv")
# df_intra_calls_data.head()

In [382]:
# # bucket_name = "tte-eid-d-gcs-mlops-users-files"
# # bucket_folder = "VoiceAI/tempFolder"
# # storage_path = f"gs://{bucket_name}/{bucket_folder}"

project_id = "eid-dw-dev-a5cf"

target_table_id = "EdwBI.Calls_Insights_Intra_Gemini-sun"
df = df_intra_calls_data

# target_table_id = "EdwBI.Calls_Insights_Inter_Gemini-sun"
# df = df_inter_calls_data

client = bigquery.Client()
job_config = bigquery.job.load.LoadJobConfig()
table_id = f"{project_id}.{target_table_id}"
job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
job = client.load_table_from_dataframe(dataframe=df, destination=table_id, job_config=job_config, project=project_id)
job.result()

LoadJob<project=eid-dw-dev-a5cf, location=US, id=a25de6c7-06c9-4344-b785-09875e49bef5>

# Misc

In [371]:
df_inter_calls_data.columns

Index(['call_id', 'call_text', 'call_summary', 'topic', 'category',
       'sub_category', 'overall_user_sentiments', 'overall_agent_sentiments',
       'agent_coaching'],
      dtype='object')