### Step 11.  Summaries of Summaries - for Cohere

### Import initial libraries and import the submissions CSV that includes sentiment scores

In [1]:
import pandas as pd


In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

comments_tokenizer = AutoTokenizer.from_pretrained("stevied67/pegasus-subreddit-comments-summarizer")

#### Retrieve data from step 10

In [3]:
comments_df = pd.read_csv('tfcc_cohere_comments_with_accuracy.csv')



#### Consolidate the comments by topic

In previous steps multiple comments were concatenated and summarized. This step takes the previously generated summaries, concatenates them together, and then does a final round of summarization.

In [4]:
df = comments_df.copy()

In [5]:
def concatenate_summaries(group, max_words=2000):
    concatenated_summary = ""
    word_count = 0
    for summary in group['summary']:
        current_summary_words = summary.split()
        if word_count + len(current_summary_words) <= max_words:
            concatenated_summary += summary + " ~ "
            word_count += len(current_summary_words)
        else:
            break
    return concatenated_summary.strip(" ~ ")

# Group the DataFrame by 'topic' and apply the concatenate_summaries function
grouped = df.groupby('topic').apply(concatenate_summaries).reset_index()

# Rename columns of the new DataFrame
grouped.columns = ['topic', 'concatenated_summary']


#### The grouped dataframe is a concatenation of previous summaries

In [6]:
grouped

Unnamed: 0,topic,concatenated_summary
0,0.0,I work in a call center. I got a call from a v...
1,1.0,I used to work for the mental health branch of...
2,2.0,I had to go back and reread that. You're givin...
3,3.0,I work for a different variety of insurance an...
4,4.0,A woman called a dental office to ask for her ...
5,5.0,A woman in the US has been filmed berating a c...
6,6.0,I had a similar situation when doing customer ...
7,7.0,I quit my job after 20 years and made over 27 ...
8,8.0,I don't blame you for being upset. I would be ...
9,9.0,I was in retail for a decade. I was in a call ...


#### Generate summaries of summaries for comments

In [7]:
import cohere
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import time

co = cohere.Client("XXXXXXXXXX")

def cohere_summary(text):
    attempts = 0
    while attempts < 3:
        try:
            response = co.summarize(
                model='summarize-xlarge',
                length='long',
                additional_command='focus on advice given, summary should be a minimum of 120 words',

                format='paragraph',
                temperature=0.3,
                text=text
            )
            return response.summary
        except Exception as e:
            attempts += 1
            time.sleep(1)

    return None

def process_row(index, row, new_grouped):
    rtext = row["concatenated_summary"]
    summary = cohere_summary(rtext)
    
    if summary is not None:
        new_grouped.at[index, "new_summary"] = summary
    return summary is not None







In [8]:

# Create new_grouped DataFrame
new_grouped = grouped.copy()
new_grouped["new_summary"] = None

max_workers = 1

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    tasks = []
    
    for index, row in grouped.iterrows():
        tasks.append(executor.submit(process_row, index, row, new_grouped))
    
    # Display progress using tqdm
    for task in tqdm(as_completed(tasks), total=len(tasks), desc="Processing rows"):
        pass

Processing rows: 100%|██████████| 20/20 [01:50<00:00,  5.53s/it]


In [9]:
new_grouped

Unnamed: 0,topic,concatenated_summary,new_summary
0,0.0,I work in a call center. I got a call from a v...,I work in a call center. I got a call from a v...
1,1.0,I used to work for the mental health branch of...,I had a mother call me one time to request we ...
2,2.0,I had to go back and reread that. You're givin...,OP is a salesperson who sold a TV to a custome...
3,3.0,I work for a different variety of insurance an...,I'm sorry but I'm not going to give you a clai...
4,4.0,A woman called a dental office to ask for her ...,I work in a call center. I deal with a lot of ...
5,5.0,A woman in the US has been filmed berating a c...,A woman in the US has been filmed berating a c...
6,6.0,I had a similar situation when doing customer ...,I'm an ex-call center worker. I worked for a m...
7,7.0,I quit my job after 20 years and made over 27 ...,I worked at a call center. We would drink on t...
8,8.0,I don't blame you for being upset. I would be ...,I'm so sorry you had to go through that. Ive h...
9,9.0,I was in retail for a decade. I was in a call ...,I've been in a call centre for 5 years. I cry ...


In [10]:
new_grouped.to_csv('tfcc_comments_summaries_of_summaries_cohere.csv', index=False)

#### Create summaries of summaries for submissions.

#### Read in the submissions data that has passed accuracy checks

In [2]:
#new_filtered_df.to_csv('tfcc_submissions_top20_with_sentiment_including_comment_sentiment_and_summaries.csv', index=False)
df = pd.read_csv('tfcc_submissions_cohere_with_accuracy.csv')


In [3]:
df

Unnamed: 0,id,title,selftext,author,score,num_comments,created_date,selftext_length,topic,pos_sentiment,neg_sentiment,comments_pos_sentiment,comments_neg_sentiment,summary,bert_f1
0,9odgd4,"If you cuss before you tell me why, I'm hangin...","so. this just happened. i had a caller, we wil...",sleepernick,4183,167,2018-10-15 15:03:20,317,0,0.002438,0.997562,0.167575,0.832425,So I work in a call center. We have recorded l...,0.812459
1,acncqg,I swear I’m not usually this dumb: when the ca...,my absolute favorite type of calls are when th...,QuoteTheKitty,2002,63,2019-01-04 22:13:45,173,0,0.360828,0.639172,0.239294,0.760706,My absolute favorite type of calls are when th...,0.902164
2,js29up,The mute button is not the customers friend,at a previous call center i worked they wanted...,supersizedlady,1756,115,2020-11-11 05:30:44,240,0,0.003723,0.996277,0.305934,0.694066,I worked at a call center. We were not allowed...,0.886994
3,h0gvxc,Customer loves his analogy until I use it agai...,this call was from a while ago so i don't real...,BostonB96,1411,42,2020-06-10 18:28:42,330,0,0.184346,0.815654,0.001718,0.998282,This call was from a while ago so I don't real...,0.934155
4,j101m6,Finally cracked on a caller.,i was barely awake enough to speak to humans b...,AgentGerk,1407,113,2020-09-27 22:15:44,527,0,0.323572,0.676428,0.135200,0.864800,I work a customer service job where I answer c...,0.844550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1844,4oloj6,Shooter threat,"wednesday night, someone called in and threate...",Believeinthis,18,20,2016-06-17 22:08:20,149,19,0.001418,0.998582,0.001219,0.998781,"Last week, someone called in a bomb threat to ...",0.930623
1845,2dbpxp,How come people assume call center employees a...,i've had a few infuriating calls. people just ...,lacquerqueen,15,21,2014-08-12 11:05:09,254,19,0.001012,0.998988,0.150063,0.849937,I work in a call center. I answer the phone. I...,0.909976
1846,4x4pjt,Probably getting a CA because I didn't apologi...,customer complained that i blew her off despi...,evosthunder,14,4,2016-08-10 22:06:34,104,19,0.013253,0.986747,,,I work in a call center. Today I got chewed ou...,0.841947
1847,2ldeab,Don't Feed Stray Cats,"about two years ago, an old lady got upset and...",EveryoneHatesCJ,7,3,2014-11-05 15:11:42,125,19,0.005375,0.994625,,,I work in a call center. It's a large company ...,0.838430


#### Combine summaries by topic for consolidated summarization

In [None]:
# Group the tfcc_comments_with_topics DataFrame by the 'topic' column
grouped = df.groupby('topic')

# Initialize the new DataFrame with empty lists
new_df = pd.DataFrame({'topic': [], 'summary': [], 'title': []})

# Iterate through each group in the grouped DataFrame
for group_name, group_df in grouped:
    # Concatenate the 'summary' and 'emotion' values in the group with '~~' separator
    summary_list = group_df['summary'].tolist()
    emotion_list = group_df['title'].tolist()
    summary_concatenated = []
    emotion_concatenated = []
    for summary, emotion in zip(summary_list, emotion_list):
        # Check if the concatenated summary would exceed 2000 words
        words_summary = ' '.join(summary_concatenated).split()
        if len(words_summary) + len(summary.split()) > 2000:
            # Add the current concatenated summary and emotion to the new DataFrame
            summary_str = '~~'.join(summary_concatenated).strip()
            emotion_str = '~~'.join(emotion_concatenated).strip()
            new_df = new_df.append({'topic': group_name, 'summary': summary_str, 'title': emotion_str}, ignore_index=True)
            summary_concatenated = []
            emotion_concatenated = []
        # Concatenate the current summary and emotion to the concatenated summary and emotion with '~~' separator
        summary_concatenated.append(summary)
        emotion_str = '~~'.join(emotion) if isinstance(emotion, list) else emotion
        emotion_concatenated.append(emotion_str)
    # Add any remaining concatenated summary and emotion to the new DataFrame
    summary_str = '~~'.join(summary_concatenated).strip()
    emotion_str = '~~'.join(emotion_concatenated).strip()
    new_df = new_df.append({'topic': group_name, 'summary': summary_str, 'title': emotion_str}, ignore_index=True)

In [5]:
new_df

Unnamed: 0,topic,summary,title
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin..."
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...
...,...,...,...
94,18.0,I got a call this morning from an interpreter ...,"Oh, it's an emergency? Okay, I'll just stop re..."
95,18.0,A customer has complained about a regulated co...,"Update: Complaining about complaining, what fu..."
96,19.0,I work in a call center. My job is to assist c...,The lawyer card~~Can I speak to someone from t...
97,19.0,I worked for a TV/phone/internet companies cal...,Call center malicious compliance.~~I’ll wait a...


In [6]:
grouped = new_df.copy()

In [7]:
grouped

Unnamed: 0,topic,summary,title
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin..."
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...
...,...,...,...
94,18.0,I got a call this morning from an interpreter ...,"Oh, it's an emergency? Okay, I'll just stop re..."
95,18.0,A customer has complained about a regulated co...,"Update: Complaining about complaining, what fu..."
96,19.0,I work in a call center. My job is to assist c...,The lawyer card~~Can I speak to someone from t...
97,19.0,I worked for a TV/phone/internet companies cal...,Call center malicious compliance.~~I’ll wait a...


#### Function to call Cohere and create summaries of summaries for submissions

In [8]:
import cohere
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import time

co = cohere.Client("XXXXXXXXXX")

def cohere_summary(text):
    attempts = 0
    while attempts < 3:
        try:
            response = co.summarize(
                model='summarize-xlarge',
                length='long',
                format='paragraph',
                temperature=0.3,
                text=text
            )
            return response.summary
        except Exception as e:
            attempts += 1
            time.sleep(1)

    return None

def process_row(index, row, new_grouped):
    rtext = row["summary"]
    summary = cohere_summary(rtext)
    
    if summary is not None:
        new_grouped.at[index, "new_summary"] = summary
    return summary is not None

In [9]:
# Create new_grouped DataFrame
new_grouped = grouped.copy()
new_grouped["new_summary"] = None

max_workers = 5

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    tasks = []
    
    for index, row in grouped.iterrows():
        tasks.append(executor.submit(process_row, index, row, new_grouped))
    
    # Display progress using tqdm
    for task in tqdm(as_completed(tasks), total=len(tasks), desc="Processing rows"):
        pass

Processing rows: 100%|██████████| 99/99 [03:31<00:00,  2.14s/it]


In [10]:
new_grouped.head(10)

Unnamed: 0,topic,summary,title,new_summary
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin...","I work in a call center. Yesterday, I had a ca..."
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...,I worked in a call center for a new online ban...
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...,Call centre workers have been sharing their mo...
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...,I'm a people pleaser. I don't like being confr...
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...,Call centers are the pits of the service indus...
5,0.0,Today I got a call from a lady that had a very...,You open when I say you open!~~This poor old l...,I'm not a fan of customer service. I'm not a f...
6,1.0,A customer wanted a replacement card as the on...,Is. Your. Card. Damaged?!~~A scam that shook m...,A customer wanted a replacement card as the on...
7,1.0,"Mrs Customer lost her credit card. Me, an empl...","Yes Mrs Customer, I do need your full SSN~~Stu...","A collection of funny, sad, and just plain WTF..."
8,1.0,I work for a bank. A woman called today who wa...,No you don’t get rewards point for failing bas...,A woman called today who was a new customer cr...
9,1.0,Customer service. 40-something year old custom...,“OK Let me put my mom on.”~~You HAVE to waive ...,A customer calls in because her debit card has...


In [14]:
import cohere
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import time

co = cohere.Client("XXXXXXXXXX")

def cohere_summary(text):
    attempts = 0
    while attempts < 3:
        try:
            response = co.summarize(
                model='summarize-xlarge',
                additional_command='focusing on theme',
                length='short',
                format='bullets',
                temperature=0.3,
                text=text
            )
            return response.summary
        except Exception as e:
            attempts += 1
            time.sleep(1)

    return None

def process_row(index, row, final_grouped):
    rtext = row["new_summary"]
    summary = cohere_summary(rtext)
    
    if summary is not None:
        final_grouped.at[index, "theme"] = summary
    return summary is not None

In [13]:
final_grouped.head(10)

Unnamed: 0,topic,summary,title,new_summary,theme
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin...",,- Call center workers share their most memorab...
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...,,- A selection of stories from users of this site.
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...,,- A former call centre worker has revealed the...
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...,,"- I just got called stupid, incompetent and a ..."
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...,,"- A collection of funny, sad, and true stories..."
5,0.0,Today I got a call from a lady that had a very...,You open when I say you open!~~This poor old l...,,"- I'm not sure if it's a Southern thing, a gen..."
6,1.0,A customer wanted a replacement card as the on...,Is. Your. Card. Damaged?!~~A scam that shook m...,,- A former bank worker has revealed the hilari...
7,1.0,"Mrs Customer lost her credit card. Me, an empl...","Yes Mrs Customer, I do need your full SSN~~Stu...",,- People tell their most memorable customer se...
8,1.0,I work for a bank. A woman called today who wa...,No you don’t get rewards point for failing bas...,,- A former Australian bank worker has revealed...
9,1.0,Customer service. 40-something year old custom...,“OK Let me put my mom on.”~~You HAVE to waive ...,,- A collection of the most ridiculous customer...


In [15]:
final_grouped['new_summary'] = new_grouped['new_summary'].values


In [16]:
final_grouped

Unnamed: 0,topic,summary,title,new_summary,theme
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin...","I work in a call center. Yesterday, I had a ca...",- Call center workers share their most memorab...
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...,I worked in a call center for a new online ban...,- A selection of stories from users of this site.
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...,Call centre workers have been sharing their mo...,- A former call centre worker has revealed the...
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...,I'm a people pleaser. I don't like being confr...,"- I just got called stupid, incompetent and a ..."
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...,Call centers are the pits of the service indus...,"- A collection of funny, sad, and true stories..."
...,...,...,...,...,...
94,18.0,I got a call this morning from an interpreter ...,"Oh, it's an emergency? Okay, I'll just stop re...",I work for a local power company. lady called ...,- Call centre workers have shared some of the ...
95,18.0,A customer has complained about a regulated co...,"Update: Complaining about complaining, what fu...",A customer has complained about a regulated co...,- A customer has complained about a regulated ...
96,19.0,I work in a call center. My job is to assist c...,The lawyer card~~Can I speak to someone from t...,I work in a call center. My job is to assist c...,- I work in a call center.\n- My job is to ass...
97,19.0,I worked for a TV/phone/internet companies cal...,Call center malicious compliance.~~I’ll wait a...,"""I work in a call center. Yesterday a customer...",- A call centre worker has shared the moment a...


In [17]:
df = pd.read_csv('tfcc_submissions_topic_counts_top_20')


In [18]:
df

Unnamed: 0,Topic,Count,Name
0,-1,5316,-1_get_like_work_day
1,0,796,0_like_help_name_say
2,1,591,1_card_bank_credit_fraud
3,2,457,2_delivery_shipping_store_email
4,3,283,3_insurance_car_claims_coverage
...,...,...,...
57,56,13,56_guam_california_job_resume
58,57,12,57_hate_sticker_legion_tolls
59,58,12,58_inactivate_calling_dialled_cincinnati
60,59,11,59_kevin_hurricane_name_deadline


In [19]:
import pandas as pd

# Merge the DataFrames on the columns containing topics
merged_df = final_grouped.merge(df, left_on='topic', right_on='Topic', how='left')

# Create a new column 'topic_name' with the value from the 'Name' column in df
merged_df['topic_name'] = merged_df['Name']

# Drop unnecessary columns
merged_df.drop(['Topic', 'Name'], axis=1, inplace=True)

# Update final_grouped with the new DataFrame
final_grouped = merged_df


In [20]:
final_grouped

Unnamed: 0,topic,summary,title,new_summary,theme,Count,topic_name
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin...","I work in a call center. Yesterday, I had a ca...",- Call center workers share their most memorab...,796,0_like_help_name_say
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...,I worked in a call center for a new online ban...,- A selection of stories from users of this site.,796,0_like_help_name_say
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...,Call centre workers have been sharing their mo...,- A former call centre worker has revealed the...,796,0_like_help_name_say
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...,I'm a people pleaser. I don't like being confr...,"- I just got called stupid, incompetent and a ...",796,0_like_help_name_say
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...,Call centers are the pits of the service indus...,"- A collection of funny, sad, and true stories...",796,0_like_help_name_say
...,...,...,...,...,...,...,...
94,18.0,I got a call this morning from an interpreter ...,"Oh, it's an emergency? Okay, I'll just stop re...",I work for a local power company. lady called ...,- Call centre workers have shared some of the ...,59,18_meter_heater_complaint_manager
95,18.0,A customer has complained about a regulated co...,"Update: Complaining about complaining, what fu...",A customer has complained about a regulated co...,- A customer has complained about a regulated ...,59,18_meter_heater_complaint_manager
96,19.0,I work in a call center. My job is to assist c...,The lawyer card~~Can I speak to someone from t...,I work in a call center. My job is to assist c...,- I work in a call center.\n- My job is to ass...,55,19_supervisor_manager_escalated_get
97,19.0,I worked for a TV/phone/internet companies cal...,Call center malicious compliance.~~I’ll wait a...,"""I work in a call center. Yesterday a customer...",- A call centre worker has shared the moment a...,55,19_supervisor_manager_escalated_get


In [21]:
final_grouped.to_csv('tfcc_top_20_summaries_of_summaries_cohere.csv', index=False)

#### Correct theme column

In [11]:

df = pd.read_csv('tfcc_top_20_summaries_of_summaries_cohere.csv')


In [12]:
df

Unnamed: 0,topic,summary,title,new_summary,Count,topic_name,theme
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin...","I work in a call center. Yesterday, I had a ca...",796,0_like_help_name_say,"professionalism, verbal conflict"
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...,I worked in a call center for a new online ban...,796,0_like_help_name_say,Call center
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...,Call centre workers have been sharing their mo...,796,0_like_help_name_say,Call centre work
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...,I'm a people pleaser. I don't like being confr...,796,0_like_help_name_say,people-pleasing confrontation assertiv...
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...,Call centers are the pits of the service indus...,796,0_like_help_name_say,Call centers Work Customer service
...,...,...,...,...,...,...,...
94,18.0,I got a call this morning from an interpreter ...,"Oh, it's an emergency? Okay, I'll just stop re...",I work for a local power company. lady called ...,59,18_meter_heater_complaint_manager,Customer service
95,18.0,A customer has complained about a regulated co...,"Update: Complaining about complaining, what fu...",A customer has complained about a regulated co...,59,18_meter_heater_complaint_manager,Complaint handling
96,19.0,I work in a call center. My job is to assist c...,The lawyer card~~Can I speak to someone from t...,I work in a call center. My job is to assist c...,55,19_supervisor_manager_escalated_get,profanity
97,19.0,I worked for a TV/phone/internet companies cal...,Call center malicious compliance.~~I’ll wait a...,"""I work in a call center. Yesterday a customer...",55,19_supervisor_manager_escalated_get,"Customer service, workplace"


In [8]:
df = df.drop('theme', axis=1)


In [2]:
import cohere
co = cohere.Client('XXXXXXXXXX') 
response = co.generate(
  model='command-xlarge-beta',
  prompt='classify the theme of this text in 2 or 3 descriptive words only:  \n\nI worked in medical billing from 2015-2019. I have many tales I can share. Part of my job duties involved calling people to go over what their insurance policies would cover in regards to surgical procedures, treatments, visits, and sometimes medications. The annoying issues I ran into were cases of parents of adult children living at home demanding that I tell them the purpose of the call for their adult children and I had to explain that I could only speak to their adult children and then there were the spouses of patients who demanded the same thing. I found often there are a good amount of people who didn\'t seem to understand or even know about the existence of HIPAA laws. Here\'s one of those tales.\n\nMe-Billing Agent (BA)\n\nAnnoying Wife of Patient-AWP\n\nAdditional info-Patient was scheduled for an upcoming surgery and no HIPAA authorizations were on his file.\n\nI call the home- \"Hello, this is BA from Such and Such Health Insurance company, may I speak with John Doe?\"\n\nAWP- He\'s out in the garage, I\'m his wife.\"\n\nBA- \"Is he available to speak with me? Or can I arrange a call back?\"\n\nAWP-\"I filled out his paperwork and you can speak to me\".\n\nBA-\"M\'aam\" I can only speak with him\".\n\nAWP- \"Like I said I filled out his paperwork and you speak to me\".\n\nI then again tell her that I can only speak to him. I set up the callback. Called several hours later and spoke to the patient. I could hear the wife in the background butting in with the answers and such.\n\nAfter that encounter and similar encounters on that job I made the vow that if I ever got married I wouldn\'t butt into things or situations that my wife can handle on her own given that she has the ability to speak on her own behalf. Yes, there are situations that warrant medical POAs and HIPAA authorization involvement because of things like dementia, brain injuries, issues communicating due to stroke, illness, etc. If a person is able to speak on their own behalf and they have all their faculties in tact, then they should do so and spouses or other family members need to respect that and not butt in or make demands of people working for insurance companies, hospital billing etc.\n\nAdditional note-Another memorable incident on that job involved a 19 year old male. His mother answered the phone number on file and kept ranting \"I\'m his mother you can talk to me\". I laughed after that call because I lived at home due to finances and to help my parents with my grandmother until I was 21. But, my parents never butted into my business regarding insurance, my job, and other things. Years later, when I worked in billing it made me glad that my parents aren\'t nosy assholes.',
  max_tokens=300,
  temperature=0.9,
  k=0,
  stop_sequences=[],
  return_likelihoods='NONE')
print('Prediction: {}'.format(response.generations[0].text))

Prediction: 

health insurance, legal


In [5]:
import cohere
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import time

co = cohere.Client("XXXXXXXXXX")

def cohere_summary(text):
    attempts = 0
    while attempts < 3:
        try:
            response = co.generate(
                model='command-xlarge-beta',
                prompt= text + '\n :classify the theme of this text in 2 or 3 descriptive words only',
                max_tokens=300,
                temperature=0.9,
                k=0,
                stop_sequences=[],
                return_likelihoods='NONE'
            )
            return response.generations[0].text
        except Exception as e:
            attempts += 1
            time.sleep(1)

    return None

def process_row(index, row, final_grouped):
    rtext = row["new_summary"]
    summary = cohere_summary(rtext)
    
    if summary is not None:
        final_grouped.at[index, "theme"] = summary
    return summary is not None

In [6]:
# Create new_grouped DataFrame
new_df = df.copy()
new_df["theme"] = None

max_workers = 1

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    tasks = []
    
    for index, row in df.iterrows():
        tasks.append(executor.submit(process_row, index, row, new_df))
    
    # Display progress using tqdm
    for task in tqdm(as_completed(tasks), total=len(tasks), desc="Processing rows"):
        pass

Processing rows: 100%|██████████| 99/99 [19:35<00:00, 11.88s/it]


In [7]:
new_df

Unnamed: 0,topic,summary,title,new_summary,Count,topic_name,theme
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin...","I work in a call center. Yesterday, I had a ca...",796,0_like_help_name_say,\n\nCustomer service
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...,I worked in a call center for a new online ban...,796,0_like_help_name_say,\n\ncustomer service
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...,Call centre workers have been sharing their mo...,796,0_like_help_name_say,\n\nCall centre - work - experience
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...,I'm a people pleaser. I don't like being confr...,796,0_like_help_name_say,\n\nPeople pleasing
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...,Call centers are the pits of the service indus...,796,0_like_help_name_say,"\n\ncall centers, customer service, work"
...,...,...,...,...,...,...,...
94,18.0,I got a call this morning from an interpreter ...,"Oh, it's an emergency? Okay, I'll just stop re...",I work for a local power company. lady called ...,59,18_meter_heater_complaint_manager,\nHealth care
95,18.0,A customer has complained about a regulated co...,"Update: Complaining about complaining, what fu...",A customer has complained about a regulated co...,59,18_meter_heater_complaint_manager,\nDispute resolution
96,19.0,I work in a call center. My job is to assist c...,The lawyer card~~Can I speak to someone from t...,I work in a call center. My job is to assist c...,55,19_supervisor_manager_escalated_get,\n\ncustomer service
97,19.0,I worked for a TV/phone/internet companies cal...,Call center malicious compliance.~~I’ll wait a...,"""I work in a call center. Yesterday a customer...",55,19_supervisor_manager_escalated_get,\nCustomer service\n\nCall center


In [13]:
temp_df = new_df.copy()

In [24]:
temp_df = temp_df.dropna()

In [15]:
# create a boolean mask for rows with missing values in the 'city' column
mask = new_df['theme'].isna()

# select only the rows with missing values in the 'city' column
df = df[mask]

In [27]:
temp_df

Unnamed: 0,topic,summary,title,new_summary,Count,topic_name,theme
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin...","I work in a call center. Yesterday, I had a ca...",796,0_like_help_name_say,"I'm sorry. Me: I don't think you're an idiot,..."
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...,I worked in a call center for a new online ban...,796,0_like_help_name_say,\n\nCustomer service; frustration; entertainment
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...,Call centre workers have been sharing their mo...,796,0_like_help_name_say,"\n\nWork, job, or profession-related\n"
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...,I'm a people pleaser. I don't like being confr...,796,0_like_help_name_say,"\n\naccommodating, considerate, polite"
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...,Call centers are the pits of the service indus...,796,0_like_help_name_say,"\n\nCall centers, low pay, long hours, ridicul..."
5,0.0,Today I got a call from a lady that had a very...,You open when I say you open!~~This poor old l...,I'm not a fan of customer service. I'm not a f...,796,0_like_help_name_say,"\n\nThe theme of this text is ""I'm not a fan o..."
6,1.0,A customer wanted a replacement card as the on...,Is. Your. Card. Damaged?!~~A scam that shook m...,A customer wanted a replacement card as the on...,591,1_card_bank_credit_fraud,"\n\n""Customer service"" and ""bank"""
7,1.0,"Mrs Customer lost her credit card. Me, an empl...","Yes Mrs Customer, I do need your full SSN~~Stu...","A collection of funny, sad, and just plain WTF...",591,1_card_bank_credit_fraud,\n\nThe theme of this text is customer service...
8,1.0,I work for a bank. A woman called today who wa...,No you don’t get rewards point for failing bas...,A woman called today who was a new customer cr...,591,1_card_bank_credit_fraud,"\n\nSuspicious, scam, fraud"
9,1.0,Customer service. 40-something year old custom...,“OK Let me put my mom on.”~~You HAVE to waive ...,A customer calls in because her debit card has...,591,1_card_bank_credit_fraud,"\n\nCustomer service, fraud, child support"


In [28]:
new_df

Unnamed: 0,topic,summary,title,new_summary,Count,topic_name,theme
10,1.0,I work in a call centre. We have recently upda...,"I'm an accountant, hence I'm right.~~You don't...",I work in a call centre. This means that we ca...,591,1_card_bank_credit_fraud,"\nFrustration, management"
11,1.0,I work in a call center. Me and a coworker of ...,Cancelling your card does not mean you don't h...,I work in a call center. Me and a coworker of ...,591,1_card_bank_credit_fraud,\n\nthe theme of this text is work\nmotifs: wo...
12,2.0,A food delivery customer service rep has share...,We can hear you when you’re on hold. Say goodb...,A customer service rep has shared the racist a...,457,2_delivery_shipping_store_email,"\n\nCustomer service, racism"
13,2.0,When I was in my early 20s I used to middle-ma...,"Not a call center , but customer called my sto...",I work in customer service. I deal with angry ...,457,2_delivery_shipping_store_email,\nthe theme of this text is customer service w...
14,2.0,We have a repeat customer who is known for bei...,"Sure, I'll just inconvenience a few hundred pe...",We have a repeat customer who is known for bei...,457,2_delivery_shipping_store_email,\nCustomer service\norder fulfillment
...,...,...,...,...,...,...,...
94,18.0,I got a call this morning from an interpreter ...,"Oh, it's an emergency? Okay, I'll just stop re...",I work for a local power company. lady called ...,59,18_meter_heater_complaint_manager,"\n""power"" and ""electricity"""
95,18.0,A customer has complained about a regulated co...,"Update: Complaining about complaining, what fu...",A customer has complained about a regulated co...,59,18_meter_heater_complaint_manager,\ntheme: customer relations
96,19.0,I work in a call center. My job is to assist c...,The lawyer card~~Can I speak to someone from t...,I work in a call center. My job is to assist c...,55,19_supervisor_manager_escalated_get,"\n\nCustomer service, frustration, anger"
97,19.0,I worked for a TV/phone/internet companies cal...,Call center malicious compliance.~~I’ll wait a...,"""I work in a call center. Yesterday a customer...",55,19_supervisor_manager_escalated_get,After that she just told us to send her a rep...


In [31]:
combined_df = pd.concat([temp_df, new_df])


In [49]:
combined_df

Unnamed: 0,topic,summary,title,new_summary,Count,topic_name,theme
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin...","I work in a call center. Yesterday, I had a ca...",796,0_like_help_name_say,"I'm sorry. Me: I don't think you're an idiot,..."
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...,I worked in a call center for a new online ban...,796,0_like_help_name_say,\n\nCustomer service; frustration; entertainment
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...,Call centre workers have been sharing their mo...,796,0_like_help_name_say,"\n\nWork, job, or profession-related\n"
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...,I'm a people pleaser. I don't like being confr...,796,0_like_help_name_say,"\n\naccommodating, considerate, polite"
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...,Call centers are the pits of the service indus...,796,0_like_help_name_say,"\n\nCall centers, low pay, long hours, ridicul..."
...,...,...,...,...,...,...,...
94,18.0,I got a call this morning from an interpreter ...,"Oh, it's an emergency? Okay, I'll just stop re...",I work for a local power company. lady called ...,59,18_meter_heater_complaint_manager,"\n""power"" and ""electricity"""
95,18.0,A customer has complained about a regulated co...,"Update: Complaining about complaining, what fu...",A customer has complained about a regulated co...,59,18_meter_heater_complaint_manager,\ntheme: customer relations
96,19.0,I work in a call center. My job is to assist c...,The lawyer card~~Can I speak to someone from t...,I work in a call center. My job is to assist c...,55,19_supervisor_manager_escalated_get,"\n\nCustomer service, frustration, anger"
97,19.0,I worked for a TV/phone/internet companies cal...,Call center malicious compliance.~~I’ll wait a...,"""I work in a call center. Yesterday a customer...",55,19_supervisor_manager_escalated_get,After that she just told us to send her a rep...


In [None]:
new_df['theme'] = new_df['theme'].str.replace('\n', '').str.replace('*', '').str.replace('"', '')


In [9]:
new_df

Unnamed: 0,topic,summary,title,new_summary,Count,topic_name,theme
0,0.0,So I work in a call center. We have recorded l...,"If you cuss before you tell me why, I'm hangin...","I work in a call center. Yesterday, I had a ca...",796,0_like_help_name_say,Customer service
1,0.0,I worked in a call center for a new online ban...,So you won't let me access my wife's account? ...,I worked in a call center for a new online ban...,796,0_like_help_name_say,customer service
2,0.0,My friend's partner works in tech support. He ...,Mistaken gender~~I intentionally defrauded the...,Call centre workers have been sharing their mo...,796,0_like_help_name_say,Call centre - work - experience
3,0.0,Customer service email support is a gold mine ...,Hulu ad~~How to use your cellphone for a callc...,I'm a people pleaser. I don't like being confr...,796,0_like_help_name_say,People pleasing
4,0.0,I work in a call center for a large national b...,Asking for an email address is apparently quit...,Call centers are the pits of the service indus...,796,0_like_help_name_say,"call centers, customer service, work"
...,...,...,...,...,...,...,...
94,18.0,I got a call this morning from an interpreter ...,"Oh, it's an emergency? Okay, I'll just stop re...",I work for a local power company. lady called ...,59,18_meter_heater_complaint_manager,Health care
95,18.0,A customer has complained about a regulated co...,"Update: Complaining about complaining, what fu...",A customer has complained about a regulated co...,59,18_meter_heater_complaint_manager,Dispute resolution
96,19.0,I work in a call center. My job is to assist c...,The lawyer card~~Can I speak to someone from t...,I work in a call center. My job is to assist c...,55,19_supervisor_manager_escalated_get,customer service
97,19.0,I worked for a TV/phone/internet companies cal...,Call center malicious compliance.~~I’ll wait a...,"""I work in a call center. Yesterday a customer...",55,19_supervisor_manager_escalated_get,Customer serviceCall center


In [10]:
new_df.to_csv('tfcc_top_20_summaries_of_summaries_cohere.csv', index=False)

In [13]:
df = pd.read_csv('tfcc_top_20_summaries_of_summaries_cohere.csv')

In [14]:
topics = pd.read_csv('tfcc_top20_topics_with_sentiment_and_comments_sentiment.csv')


In [None]:
# Group the tfcc_comments_with_topics DataFrame by the 'topic' column
grouped = df.groupby('topic')

# Initialize the new DataFrame with empty lists
new_df = pd.DataFrame({'topic': [], 'new_summary': [], 'title': []})

# Iterate through each group in the grouped DataFrame
for group_name, group_df in grouped:
    # Concatenate the 'summary' and 'emotion' values in the group with '~~' separator
    summary_list = group_df['new_summary'].tolist()
    emotion_list = group_df['title'].tolist()
    summary_concatenated = []
    emotion_concatenated = []
    for summary, emotion in zip(summary_list, emotion_list):
        # Check if the concatenated summary would exceed 2000 words
        words_summary = ' '.join(summary_concatenated).split()
        if len(words_summary) + len(summary.split()) > 2000:
            # Add the current concatenated summary and emotion to the new DataFrame
            summary_str = '~~'.join(summary_concatenated).strip()
            emotion_str = '~~'.join(emotion_concatenated).strip()
            new_df = new_df.append({'topic': group_name, 'new_summary': summary_str, 'title': emotion_str}, ignore_index=True)
            summary_concatenated = []
            emotion_concatenated = []
        # Concatenate the current summary and emotion to the concatenated summary and emotion with '~~' separator
        summary_concatenated.append(summary)
        emotion_str = '~~'.join(emotion) if isinstance(emotion, list) else emotion
        emotion_concatenated.append(emotion_str)
    # Add any remaining concatenated summary and emotion to the new DataFrame
    summary_str = '~~'.join(summary_concatenated).strip()
    emotion_str = '~~'.join(emotion_concatenated).strip()
    new_df = new_df.append({'topic': group_name, 'new_summary': summary_str, 'title': emotion_str}, ignore_index=True)

In [16]:
new_df

Unnamed: 0,topic,new_summary,title
0,0.0,"I work in a call center. Yesterday, I had a ca...","If you cuss before you tell me why, I'm hangin..."
1,1.0,A customer wanted a replacement card as the on...,Is. Your. Card. Damaged?!~~A scam that shook m...
2,2.0,A customer service rep has shared the racist a...,We can hear you when you’re on hold. Say goodb...
3,3.0,A woman called to remove her husband from her ...,Don't cheat on your wife~~Insurance fraud ma’a...
4,4.0,I used to work at a call center. I don't anymo...,You're telling me I have to wear a mask to go ...
5,5.0,I work at a membership based roadside assistan...,"Today on ""I can't believe I had to explain thi..."
6,6.0,I work in telecom. A customer called demanding...,I will never forget the sound of this customer...
7,7.0,"I'm 32, getting bullied in work and I honestly...","At 32, I’m getting bullied at work~~Coworker s..."
8,8.0,I work in a call center. Today a client was re...,“I want to speak to somebody who speaks real e...
9,9.0,"I recently got promoted, which involved me mov...",In which new management fires everyone~~I FINA...


In [17]:
topics

Unnamed: 0,Topic,Count,Name,avg_pos_sentiment,avg_neg_sentiment,avg_comments_pos_sentiment,avg_comments_neg_sentiment
0,0,796,0_like_help_name_say,0.161216,0.838784,0.196144,0.803856
1,1,591,1_card_bank_credit_fraud,0.113983,0.886017,0.148113,0.851887
2,2,457,2_delivery_shipping_store_email,0.122277,0.877723,0.163302,0.836698
3,3,283,3_insurance_car_claims_coverage,0.103466,0.896534,0.11797,0.88203
4,4,243,4_patient_doctor_clinic_medical,0.13952,0.86048,0.159091,0.840909
5,5,201,5_tow_roadside_truck_assistance,0.159511,0.840489,0.139324,0.860676
6,6,188,6_bill_phones_service_data,0.1335,0.8665,0.103897,0.896103
7,7,108,7_job_feel_anxiety_work,0.154886,0.845114,0.290429,0.709571
8,8,106,8_english_spanish_speak_language,0.154176,0.845824,0.2081,0.7919
9,9,101,9_job_interview_role_experience,0.374143,0.625857,0.343623,0.656377


In [18]:
# Merge the two dataframes using pd.concat()
merged_df = pd.concat([new_df, topics], axis=1)


In [19]:
merged_df

Unnamed: 0,topic,new_summary,title,Topic,Count,Name,avg_pos_sentiment,avg_neg_sentiment,avg_comments_pos_sentiment,avg_comments_neg_sentiment
0,0.0,"I work in a call center. Yesterday, I had a ca...","If you cuss before you tell me why, I'm hangin...",0,796,0_like_help_name_say,0.161216,0.838784,0.196144,0.803856
1,1.0,A customer wanted a replacement card as the on...,Is. Your. Card. Damaged?!~~A scam that shook m...,1,591,1_card_bank_credit_fraud,0.113983,0.886017,0.148113,0.851887
2,2.0,A customer service rep has shared the racist a...,We can hear you when you’re on hold. Say goodb...,2,457,2_delivery_shipping_store_email,0.122277,0.877723,0.163302,0.836698
3,3.0,A woman called to remove her husband from her ...,Don't cheat on your wife~~Insurance fraud ma’a...,3,283,3_insurance_car_claims_coverage,0.103466,0.896534,0.11797,0.88203
4,4.0,I used to work at a call center. I don't anymo...,You're telling me I have to wear a mask to go ...,4,243,4_patient_doctor_clinic_medical,0.13952,0.86048,0.159091,0.840909
5,5.0,I work at a membership based roadside assistan...,"Today on ""I can't believe I had to explain thi...",5,201,5_tow_roadside_truck_assistance,0.159511,0.840489,0.139324,0.860676
6,6.0,I work in telecom. A customer called demanding...,I will never forget the sound of this customer...,6,188,6_bill_phones_service_data,0.1335,0.8665,0.103897,0.896103
7,7.0,"I'm 32, getting bullied in work and I honestly...","At 32, I’m getting bullied at work~~Coworker s...",7,108,7_job_feel_anxiety_work,0.154886,0.845114,0.290429,0.709571
8,8.0,I work in a call center. Today a client was re...,“I want to speak to somebody who speaks real e...,8,106,8_english_spanish_speak_language,0.154176,0.845824,0.2081,0.7919
9,9.0,"I recently got promoted, which involved me mov...",In which new management fires everyone~~I FINA...,9,101,9_job_interview_role_experience,0.374143,0.625857,0.343623,0.656377


In [20]:
import cohere
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import time

co = cohere.Client("XXXXXXXXXX")

def cohere_summary(text):
    attempts = 0
    while attempts < 3:
        try:
            response = co.summarize(
                model='summarize-xlarge',
                additional_command='lengthy paragraph written in an academic style',
                length='long',
                format='paragraph',
                temperature=0.3,
                text=text
            )
            return response.summary
        except Exception as e:
            attempts += 1
            time.sleep(1)

    return None

def process_row(index, row, final_grouped):
    rtext = row["new_summary"]
    summary = cohere_summary(rtext)
    
    if summary is not None:
        final_grouped.at[index, "theme"] = summary
    return summary is not None

In [43]:
# Create new_grouped DataFrame
new_merged = merged_df.copy()
new_merged["introductory_summary"] = None

max_workers = 5

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    tasks = []
    
    for index, row in merged_df.iterrows():
        tasks.append(executor.submit(process_row, index, row, new_merged))
    
    # Display progress using tqdm
    for task in tqdm(as_completed(tasks), total=len(tasks), desc="Processing rows"):
        pass

Processing rows: 100%|██████████| 20/20 [01:01<00:00,  3.05s/it]


In [47]:
new_merged

Unnamed: 0,topic,new_summary,title,Topic,Count,Name,avg_pos_sentiment,avg_neg_sentiment,avg_comments_pos_sentiment,avg_comments_neg_sentiment,intro_summary
0,0.0,"I work in a call center. Yesterday, I had a ca...","If you cuss before you tell me why, I'm hangin...",0,796,0_like_help_name_say,0.161216,0.838784,0.196144,0.803856,Call centers are the pits of the service indus...
1,1.0,A customer wanted a replacement card as the on...,Is. Your. Card. Damaged?!~~A scam that shook m...,1,591,1_card_bank_credit_fraud,0.113983,0.886017,0.148113,0.851887,A customer wanted a replacement card as the on...
2,2.0,A customer service rep has shared the racist a...,We can hear you when you’re on hold. Say goodb...,2,457,2_delivery_shipping_store_email,0.122277,0.877723,0.163302,0.836698,A customer service rep has shared the racist a...
3,3.0,A woman called to remove her husband from her ...,Don't cheat on your wife~~Insurance fraud ma’a...,3,283,3_insurance_car_claims_coverage,0.103466,0.896534,0.11797,0.88203,A woman called to remove her husband from her ...
4,4.0,I used to work at a call center. I don't anymo...,You're telling me I have to wear a mask to go ...,4,243,4_patient_doctor_clinic_medical,0.13952,0.86048,0.159091,0.840909,I used to work at a call center. I don't anymo...
5,5.0,I work at a membership based roadside assistan...,"Today on ""I can't believe I had to explain thi...",5,201,5_tow_roadside_truck_assistance,0.159511,0.840489,0.139324,0.860676,I work at a membership based roadside assistan...
6,6.0,I work in telecom. A customer called demanding...,I will never forget the sound of this customer...,6,188,6_bill_phones_service_data,0.1335,0.8665,0.103897,0.896103,A customer called demanding a replacement phon...
7,7.0,"I'm 32, getting bullied in work and I honestly...","At 32, I’m getting bullied at work~~Coworker s...",7,108,7_job_feel_anxiety_work,0.154886,0.845114,0.290429,0.709571,Call center workers have been sharing their ex...
8,8.0,I work in a call center. Today a client was re...,“I want to speak to somebody who speaks real e...,8,106,8_english_spanish_speak_language,0.154176,0.845824,0.2081,0.7919,I work in a call center. Today a client was re...
9,9.0,"I recently got promoted, which involved me mov...",In which new management fires everyone~~I FINA...,9,101,9_job_interview_role_experience,0.374143,0.625857,0.343623,0.656377,I've spent eight years in call centers in one ...


In [46]:
# Drop the introductory_summary column
new_merged.drop(columns=['introductory_summary'], inplace=True)

# Rename the theme column to intro_summary
new_merged.rename(columns={'theme': 'intro_summary'}, inplace=True)

In [48]:
new_merged.to_csv('tfcc_summary_details_cohere.csv', index=False)