### Step 11.  Summaries of Summaries - for Pegasus

### Import initial libraries and import the submissions CSV that includes sentiment scores

In [1]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from tqdm.auto import tqdm
import torch

tqdm.pandas()


In [2]:
# Check if GPU is available and use it
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [3]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

submission_tokenizer = AutoTokenizer.from_pretrained("stevied67/pegasus-reddit-summarizer2")

submission_model = AutoModelForSeq2SeqLM.from_pretrained("stevied67/pegasus-reddit-summarizer2")

comments_tokenizer = AutoTokenizer.from_pretrained("stevied67/pegasus-subreddit-comments-summarizer")

comments_model = AutoModelForSeq2SeqLM.from_pretrained("stevied67/pegasus-subreddit-comments-summarizer")

#### Functions

In [4]:


def concat_comments(df):
    # Group the DataFrame by the 'topic' column
    grouped = df.groupby('topic')

    # Initialize the new DataFrame with empty lists
    new_df = pd.DataFrame({'topic': [], 'summary': []})

    # Iterate through each group in the grouped DataFrame
    for group_name, group_df in grouped:
        # Concatenate the 'summary' values in the group with '~~' separator
        summary_list = group_df['summary'].tolist()
        summary_concatenated = []
        for summary in summary_list:
            # Check if the concatenated summary would exceed 800 tokens
            tokens_summary = comments_tokenizer.encode('~~'.join(summary_concatenated), add_special_tokens=False)
            tokens_new_summary = comments_tokenizer.encode(summary, add_special_tokens=False)
            if len(tokens_summary) + len(tokens_new_summary) > 900:
                # Add the current concatenated summary to the new DataFrame
                summary_str = '~~'.join(summary_concatenated).strip()
                new_df = pd.concat([new_df, pd.DataFrame({'topic': [group_name], 'summary': [summary_str]})])
                summary_concatenated = []
            # Concatenate the current summary to the concatenated summary with '~~' separator
            summary_concatenated.append(summary)
        
        # Add the last concatenated summary to the new DataFrame
        if summary_concatenated:
            summary_str = '~~'.join(summary_concatenated).strip()
            new_df = pd.concat([new_df, pd.DataFrame({'topic': [group_name], 'summary': [summary_str]})])

    return new_df


In [12]:
def summarize_text(text, model_name, tok_name, mtokens):
    try:
        text = text.strip().replace("\n", " ")
        if not text:
            return ""

        model_name.to(device)
        inputs = tok_name(text, max_length=1024, truncation=True, return_tensors="pt").to(device)
        summary_ids = model_name.generate(inputs["input_ids"], max_new_tokens=mtokens).cpu()
        summary = tok_name.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        return summary
    except TypeError as e:
        print(f"Error occurred for input: {text}")
        raise e


#### Retrieve data from step 10

In [6]:
comments_df = pd.read_csv('tfcc_pegasus_comments_with_accuracy.csv')



In [7]:
comments_df

Unnamed: 0,topic,selftext,summary,bert_f1
0,0.0,"Where I work, we're supposed to somewhat lay d...",The commenter shares a story about a customer ...,0.836978
1,0.0,"Not sure why I was reminded of this, but my gr...",The commenter shared a story about their grand...,0.851027
2,0.0,"""Oh, mensa. Isn't that the private organizatio...",The commenter had a bad experience with a cust...,0.831893
3,0.0,Oh thats awful. Im assuming there were rules a...,The commenter empathizes with the poster's sit...,0.822859
4,0.0,My older brother had this moment when we were ...,The commenter shares a personal experience of ...,0.820889
...,...,...,...,...
111,19.0,"I don't remember the context, but somebody was...",The commenter had a customer who repeatedly in...,0.834878
112,19.0,"So true, I had to call a customer back yesterd...",The commenter had to deal with a customer who ...,0.826705
113,19.0,"From the other side of the fence, I just had a...",The commenter had a bad experience with a cont...,0.834877
114,19.0,"ME: ""Thank you for calling &lt;large insurance...",The commenter expresses frustration with custo...,0.823343


#### Consolidate the comments by topic

In previous steps multiple comments were concatenated and summarized. This step takes the previously generated summaries, concatenates them together, and then does a final round of summarization.

In [8]:
df = comments_df.copy()

In [9]:
new_df = concat_comments(df)

In [10]:
new_df

Unnamed: 0,topic,summary
0,0.0,The commenter shares a story about a customer ...
0,1.0,The commenter shares a personal experience of ...
0,2.0,The commenter had a bad experience with a cust...
0,3.0,The commenter received a call from someone ask...
0,4.0,The commenter believes that the situation desc...
0,5.0,The commenter had a similar experience with a ...
0,6.0,The commenter worked for a mobile company and ...
0,7.0,The commenter advises bypassing the incompeten...
0,8.0,The commenter sympathizes with the original po...
0,9.0,The commenter worked in retail for 10 years an...


#### Generate summaries of summaries for comments

In [None]:
# Apply the summarize_text function to the 'summary' column and create a new column 'new_summary' with tqdm progress bar

new_df['new_summary'] = new_df['summary'].progress_apply(lambda x: summarize_text(x, comments_model, comments_tokenizer, 200))


In [12]:
new_df

Unnamed: 0,topic,summary,new_summary
0,0.0,The commenter shares a story about a customer ...,The commenter shares a story about a customer ...
0,1.0,The commenter shares a personal experience of ...,The commenter shares a personal experience of ...
0,2.0,The commenter had a bad experience with a cust...,The commenter had a bad experience with a cust...
0,3.0,The commenter received a call from someone ask...,The commenter received a call from someone ask...
0,4.0,The commenter believes that the situation desc...,The commenter believes that the situation desc...
0,5.0,The commenter had a similar experience with a ...,The commenter had a similar experience with a ...
0,6.0,The commenter worked for a mobile company and ...,The commenter worked for a mobile company and ...
0,7.0,The commenter advises bypassing the incompeten...,The commenter advises bypassing the incompeten...
0,8.0,The commenter sympathizes with the original po...,The commenter sympathizes with the original po...
0,9.0,The commenter worked in retail for 10 years an...,The commenter worked in retail for 10 years an...


In [13]:


# Function to count the number of words in a given text
def count_words(text):
    return len(text.split())


summary_word_counts = new_df['summary'].apply(count_words)
new_summary_word_counts = new_df['new_summary'].apply(count_words)

# Calculate the average number of words in each column
summary_avg_word_count = summary_word_counts.mean()
new_summary_avg_word_count = new_summary_word_counts.mean()

print(f"Average word count in 'summary' column: {summary_avg_word_count}")
print(f"Average word count in 'new_summary' column: {new_summary_avg_word_count}")


Average word count in 'summary' column: 297.9
Average word count in 'new_summary' column: 120.4


In [14]:
new_df.to_csv('tfcc_comments_summaries_of_summaries_pegasus.csv', index=False)

#### Create summaries of summaries for submissions.

#### Read in the submissions data that has passed accuracy checks

In [5]:
#new_filtered_df.to_csv('tfcc_submissions_top20_with_sentiment_including_comment_sentiment_and_summaries.csv', index=False)
df = pd.read_csv('tfcc_submissions_pegasus_with_accuracy.csv')


In [6]:
df

Unnamed: 0,id,title,selftext,author,score,num_comments,created_date,selftext_length,topic,pos_sentiment,neg_sentiment,comments_pos_sentiment,comments_neg_sentiment,summary,bert_f1
0,9odgd4,"If you cuss before you tell me why, I'm hangin...","so. this just happened. i had a caller, we wil...",sleepernick,4183,167,2018-10-15 15:03:20,317,0,0.002438,0.997562,0.167575,0.832425,The author received a call from an irate custo...,0.815338
1,acncqg,I swear I’m not usually this dumb: when the ca...,my absolute favorite type of calls are when th...,QuoteTheKitty,2002,63,2019-01-04 22:13:45,173,0,0.360828,0.639172,0.239294,0.760706,The author's favorite type of call is when a c...,0.853943
2,js29up,The mute button is not the customers friend,at a previous call center i worked they wanted...,supersizedlady,1756,115,2020-11-11 05:30:44,240,0,0.003723,0.996277,0.305934,0.694066,"At a previous call center, the author was aske...",0.881488
3,e01rb8,One of my agents actually said what everyone t...,this happened a couple of weeks ago and is bot...,wirwarennamenlos,1553,121,2019-11-22 14:33:25,138,0,0.498418,0.501582,0.250270,0.749730,An agent had a customer ask a dumb question on...,0.916453
4,h0gvxc,Customer loves his analogy until I use it agai...,this call was from a while ago so i don't real...,BostonB96,1411,42,2020-06-10 18:28:42,330,0,0.184346,0.815654,0.001718,0.998282,A customer called a software company to compla...,0.856089
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1945,4oloj6,Shooter threat,"wednesday night, someone called in and threate...",Believeinthis,18,20,2016-06-17 22:08:20,149,19,0.001418,0.998582,0.001219,0.998781,Someone called in and threatened to come shoot...,0.936185
1946,2dbpxp,How come people assume call center employees a...,i've had a few infuriating calls. people just ...,lacquerqueen,15,21,2014-08-12 11:05:09,254,19,0.001012,0.998988,0.150063,0.849937,The author has had frustrating calls from peop...,0.859934
1947,4x4pjt,Probably getting a CA because I didn't apologi...,customer complained that i blew her off despi...,evosthunder,14,4,2016-08-10 22:06:34,104,19,0.013253,0.986747,,,A customer complained that the author blew her...,0.919902
1948,2ldeab,Don't Feed Stray Cats,"about two years ago, an old lady got upset and...",EveryoneHatesCJ,7,3,2014-11-05 15:11:42,125,19,0.005375,0.994625,,,"Two years ago, an old lady got upset and reque...",0.913105


#### Combine summaries by topic for consolidated summarization

In [None]:
import pandas as pd
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("stevied67/pegasus-reddit-summarizer2")

# Group the tfcc_comments_with_topics DataFrame by the 'topic' column
grouped = df.groupby('topic')

# Initialize the new DataFrame with empty lists
new_df = pd.DataFrame({'topic': [], 'summary': []})

# Iterate through each group in the grouped DataFrame
for group_name, group_df in grouped:
    # Concatenate the 'summary' values in the group with '~~' separator
    summary_list = group_df['summary'].tolist()
    summary_concatenated = []
    for summary in summary_list:
        # Check if the concatenated summary would exceed 800 tokens
        tokens_summary = tokenizer.encode('~~'.join(summary_concatenated), add_special_tokens=False)
        tokens_new_summary = tokenizer.encode(summary, add_special_tokens=False)
        if len(tokens_summary) + len(tokens_new_summary) > 900:
            # Add the current concatenated summary to the new DataFrame
            summary_str = '~~'.join(summary_concatenated).strip()
            new_df = new_df.append({'topic': group_name, 'summary': summary_str}, ignore_index=True)
            summary_concatenated = []
        # Concatenate the current summary to the concatenated summary with '~~' separator
        summary_concatenated.append(summary)

    # Add any remaining


In [8]:
new_df

Unnamed: 0,topic,summary
0,0.0,The author received a call from an irate custo...
1,0.0,The author worked for a cell phone company and...
2,0.0,After returning from a year of paternity leave...
3,0.0,The author is trying to do a medical claim for...
4,0.0,A man called a government-funded insurance web...
...,...,...
200,19.0,"The author provides their ""employee number"" an..."
201,19.0,The author works for a bank in the escalations...
202,19.0,The author received a follow-up message from t...
203,19.0,"The author, a manager at an online retailer, w..."


In [9]:
sub_group = new_df.copy()

In [10]:
sub_group

Unnamed: 0,topic,summary
0,0.0,The author received a call from an irate custo...
1,0.0,The author worked for a cell phone company and...
2,0.0,After returning from a year of paternity leave...
3,0.0,The author is trying to do a medical claim for...
4,0.0,A man called a government-funded insurance web...
...,...,...
200,19.0,"The author provides their ""employee number"" an..."
201,19.0,The author works for a bank in the escalations...
202,19.0,The author received a follow-up message from t...
203,19.0,"The author, a manager at an online retailer, w..."


#### Call Pegasus and create summaries of summaries for submissions

In [None]:
# Apply the summarize_text function to the 'summary' column and create a new column 'new_summary' with tqdm progress bar
sub_group['new_summary'] = sub_group['summary'].progress_apply(lambda x: summarize_text(x, comments_model, comments_tokenizer, 124))


In [14]:


# Function to count the number of words in a given text
def count_words(text):
    return len(text.split())

summary_word_counts = sub_group['summary'].apply(count_words)
new_summary_word_counts = sub_group['new_summary'].apply(count_words)

# Calculate the average number of words in each column
summary_avg_word_count = summary_word_counts.mean()
new_summary_avg_word_count = new_summary_word_counts.mean()

print(f"Average word count in 'summary' column: {summary_avg_word_count}")
print(f"Average word count in 'new_summary' column: {new_summary_avg_word_count}")

Average word count in 'summary' column: 733.4048780487805
Average word count in 'new_summary' column: 90.15121951219513


In [21]:
sub_group.head(10)

Unnamed: 0,topic,summary,new_summary
0,0.0,The author received a call from an irate custo...,The author received a call from an irate custo...
1,0.0,The author worked for a cell phone company and...,The author worked for a cell phone company and...
2,0.0,After returning from a year of paternity leave...,The author received a call from a middle-aged ...
3,0.0,The author is trying to do a medical claim for...,The author is trying to do a medical claim for...
4,0.0,A man called a government-funded insurance web...,A man called a government-funded insurance web...
5,0.0,The author works in a cruise ship company call...,The author works in a cruise ship company call...
6,0.0,The author hates covid-19 and has dealt with m...,The author dislikes covid-19 and has dealt wit...
7,0.0,An employee at a telecom call center received ...,An employee at a telecom call center received ...
8,0.0,The author works for an insurance call center ...,The author works at an insurance call center a...
9,0.0,The author works for a telco and they get a fa...,The author works for a telco and they get a fa...


In [22]:
new_sub_group = sub_group.copy()

In [26]:
import openai
import json #JSON manipulation
openai.api_key = "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"

In [27]:
import openai
import re
import json
from openai.error import OpenAIError
import pandas as pd
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

def getOpenAI_Summary(prompt, selftext, timeout=30):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": "" + prompt + ": " + selftext}
            ],
            timeout=timeout
        )

        response_str = str(response)
        try:
            json_object = json.loads(response_str)
        except ValueError as e:
            print(f"Error parsing response JSON: {e}")
            print(f"Response string: {response_str}")
            return "none"

        initial_response = json_object['choices'][0]['message']['content']

        return initial_response.strip()
    except OpenAIError as e:
        if "timed out" in str(e):
            raise TimeoutError("Request to OpenAI API timed out.")
        else:
            raise e

def get_openai_summary_with_retry(prompt, rtext, retries=5, wait_time=15):
    for i in range(retries):
        try:
            return getOpenAI_Summary(prompt, rtext)
        except TimeoutError:
            if i < retries - 1:
                print(f"Timeout occurred. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                raise
    return None

def save_results(df, file_name='results.csv'):
    df.to_csv(file_name, index=False)

def process_row(row):
    rtext = str(row["new_summary"])
    summary = get_openai_summary_with_retry(summary_prompt, rtext, wait_time=retry_delay)
    return row.name, summary

In [None]:
##openai

summary_prompt = "Summarize the theme of this text in 2 or 3 words maximum.  Avoid customer service issues since all of the examples are related to customer service.  Do not provide any other commentary, just a 2 or 3 word response:"
save_interval = 10  # Save results every 10 iterations
retry_delay = 30    # Retry after 30 seconds in case of timeouts

rows_to_process = [row for _, row in sub_group.iterrows()]

with ThreadPoolExecutor(max_workers=10) as executor:
    futures = [executor.submit(process_row, row) for row in rows_to_process]

    for count, future in enumerate(as_completed(futures), start=1):
        index, summary = future.result()
        new_sub_group.at[index, "theme"] = summary

        if count % save_interval == 0:
            print(f"Saving results at iteration {count}...")
            save_results(new_sub_group)

# Save the remaining results after the loop is finished
save_results(new_sub_group)

In [26]:
new_sub_group

Unnamed: 0,topic,summary,new_summary,theme
0,0.0,The author received a call from an irate custo...,The author received a call from an irate custo...,Customer interactions.
1,0.0,The author worked for a cell phone company and...,The author worked for a cell phone company and...,Customer service issues.
2,0.0,After returning from a year of paternity leave...,The author received a call from a middle-aged ...,Sexism in workplace.
3,0.0,The author is trying to do a medical claim for...,The author is trying to do a medical claim for...,Distracting Callers.
4,0.0,A man called a government-funded insurance web...,A man called a government-funded insurance web...,Angry customers.
...,...,...,...,...
200,19.0,"The author provides their ""employee number"" an...","The author provides their ""employee number"" an...",Threats and job security.
201,19.0,The author works for a bank in the escalations...,The author works for a bank in the escalations...,Loan Escalation.
202,19.0,The author received a follow-up message from t...,The author received a follow-up message from t...,Customer Misunderstanding.
203,19.0,"The author, a manager at an online retailer, w...","The author, a manager at an online retailer, r...",Dealing with Difficult Customers.


In [27]:
final_group = new_sub_group.copy()

In [16]:
df = pd.read_csv('tfcc_submissions_topic_counts_top_20.csv')


In [29]:
df

Unnamed: 0,Topic,Count,Name
0,-1,5316,-1_get_like_work_day
1,0,796,0_like_help_name_say
2,1,591,1_card_bank_credit_fraud
3,2,457,2_delivery_shipping_store_email
4,3,283,3_insurance_car_claims_coverage
...,...,...,...
57,56,13,56_guam_california_job_resume
58,57,12,57_hate_sticker_legion_tolls
59,58,12,58_inactivate_calling_dialled_cincinnati
60,59,11,59_kevin_hurricane_name_deadline


In [30]:
import pandas as pd

# Merge the DataFrames on the columns containing topics
merged_df = final_group.merge(df, left_on='topic', right_on='Topic', how='left')

# Create a new column 'topic_name' with the value from the 'Name' column in df
merged_df['topic_name'] = merged_df['Name']

# Drop unnecessary columns
merged_df.drop(['Topic', 'Name'], axis=1, inplace=True)

# Update final_grouped with the new DataFrame
final_group = merged_df


In [31]:
final_group

Unnamed: 0,topic,summary,new_summary,theme,Count,topic_name
0,0.0,The author received a call from an irate custo...,The author received a call from an irate custo...,Customer interactions.,796,0_like_help_name_say
1,0.0,The author worked for a cell phone company and...,The author worked for a cell phone company and...,Customer service issues.,796,0_like_help_name_say
2,0.0,After returning from a year of paternity leave...,The author received a call from a middle-aged ...,Sexism in workplace.,796,0_like_help_name_say
3,0.0,The author is trying to do a medical claim for...,The author is trying to do a medical claim for...,Distracting Callers.,796,0_like_help_name_say
4,0.0,A man called a government-funded insurance web...,A man called a government-funded insurance web...,Angry customers.,796,0_like_help_name_say
...,...,...,...,...,...,...
200,19.0,"The author provides their ""employee number"" an...","The author provides their ""employee number"" an...",Threats and job security.,55,19_supervisor_manager_escalated_get
201,19.0,The author works for a bank in the escalations...,The author works for a bank in the escalations...,Loan Escalation.,55,19_supervisor_manager_escalated_get
202,19.0,The author received a follow-up message from t...,The author received a follow-up message from t...,Customer Misunderstanding.,55,19_supervisor_manager_escalated_get
203,19.0,"The author, a manager at an online retailer, w...","The author, a manager at an online retailer, r...",Dealing with Difficult Customers.,55,19_supervisor_manager_escalated_get


In [32]:
final_group.to_csv('tfcc_top_20_summaries_of_summaries_pegasus.csv', index=False)

In [17]:
df = pd.read_csv('tfcc_top_20_summaries_of_summaries_pegasus.csv')


In [18]:
topics = pd.read_csv('tfcc_top20_topics_with_sentiment_and_comments_sentiment.csv')


In [None]:
# Group the tfcc_comments_with_topics DataFrame by the 'topic' column
grouped = df.groupby('topic')

# Initialize the new DataFrame with empty lists
new_df = pd.DataFrame({'topic': [], 'new_summary': []})

# Iterate through each group in the grouped DataFrame
for group_name, group_df in grouped:
    # Concatenate the 'summary' values in the group with '~~' separator
    summary_list = group_df['new_summary'].tolist()
    summary_concatenated = []
    for summary in summary_list:
        # Check if the concatenated summary would exceed 2000 words
        words_summary = ' '.join(summary_concatenated).split()
        if len(words_summary) + len(summary.split()) > 2000:
            # Add the current concatenated summary to the new DataFrame
            summary_str = '~~'.join(summary_concatenated).strip()
            new_df = new_df.append({'topic': group_name, 'new_summary': summary_str}, ignore_index=True)
            summary_concatenated = []
        # Concatenate the current summary to the concatenated summary with '~~' separator
        summary_concatenated.append(summary)
    # Add any remaining concatenated summary to the new DataFrame
    summary_str = '~~'.join(summary_concatenated).strip()
    new_df = new_df.append({'topic': group_name, 'new_summary': summary_str}, ignore_index=True)


In [21]:
new_df

Unnamed: 0,topic,new_summary
0,0.0,The author received a call from an irate custo...
1,1.0,A customer called to request a replacement for...
2,2.0,A customer service representative checks the o...
3,3.0,A woman called a UK car insurance provider to ...
4,4.0,A customer calls a customer care center to ask...
5,5.0,A woman called a road assistance company to re...
6,6.0,A telecom worker received a call from a woman ...
7,7.0,"The author, a 32-year-old man, has been workin..."
8,8.0,A person with an accent couldn't understand an...
9,9.0,The author was recently promoted to a new job ...


In [22]:
topics

Unnamed: 0,Topic,Count,Name,avg_pos_sentiment,avg_neg_sentiment,avg_comments_pos_sentiment,avg_comments_neg_sentiment
0,0,796,0_like_help_name_say,0.161216,0.838784,0.196144,0.803856
1,1,591,1_card_bank_credit_fraud,0.113983,0.886017,0.148113,0.851887
2,2,457,2_delivery_shipping_store_email,0.122277,0.877723,0.163302,0.836698
3,3,283,3_insurance_car_claims_coverage,0.103466,0.896534,0.11797,0.88203
4,4,243,4_patient_doctor_clinic_medical,0.13952,0.86048,0.159091,0.840909
5,5,201,5_tow_roadside_truck_assistance,0.159511,0.840489,0.139324,0.860676
6,6,188,6_bill_phones_service_data,0.1335,0.8665,0.103897,0.896103
7,7,108,7_job_feel_anxiety_work,0.154886,0.845114,0.290429,0.709571
8,8,106,8_english_spanish_speak_language,0.154176,0.845824,0.2081,0.7919
9,9,101,9_job_interview_role_experience,0.374143,0.625857,0.343623,0.656377


In [23]:
# Merge the two dataframes using pd.concat()
merged_df = pd.concat([new_df, topics], axis=1)

In [24]:
merged_df

Unnamed: 0,topic,new_summary,Topic,Count,Name,avg_pos_sentiment,avg_neg_sentiment,avg_comments_pos_sentiment,avg_comments_neg_sentiment
0,0.0,The author received a call from an irate custo...,0,796,0_like_help_name_say,0.161216,0.838784,0.196144,0.803856
1,1.0,A customer called to request a replacement for...,1,591,1_card_bank_credit_fraud,0.113983,0.886017,0.148113,0.851887
2,2.0,A customer service representative checks the o...,2,457,2_delivery_shipping_store_email,0.122277,0.877723,0.163302,0.836698
3,3.0,A woman called a UK car insurance provider to ...,3,283,3_insurance_car_claims_coverage,0.103466,0.896534,0.11797,0.88203
4,4.0,A customer calls a customer care center to ask...,4,243,4_patient_doctor_clinic_medical,0.13952,0.86048,0.159091,0.840909
5,5.0,A woman called a road assistance company to re...,5,201,5_tow_roadside_truck_assistance,0.159511,0.840489,0.139324,0.860676
6,6.0,A telecom worker received a call from a woman ...,6,188,6_bill_phones_service_data,0.1335,0.8665,0.103897,0.896103
7,7.0,"The author, a 32-year-old man, has been workin...",7,108,7_job_feel_anxiety_work,0.154886,0.845114,0.290429,0.709571
8,8.0,A person with an accent couldn't understand an...,8,106,8_english_spanish_speak_language,0.154176,0.845824,0.2081,0.7919
9,9.0,The author was recently promoted to a new job ...,9,101,9_job_interview_role_experience,0.374143,0.625857,0.343623,0.656377


In [None]:
prompt = 'summarize the following text to create a brief introductory paragraph that describes the content and meaning at a high level using academic language:'
count = 1

new_merged_df = merged_df.copy()

# Create a new list to store the summaries
new_summaries = []

for index, row in new_merged_df.iterrows():
    rtext = row["new_summary"]
    rtitle = ""

    rresponse = getOpenAI_Summary(prompt, rtext)

    print(count)
    count = count + 1

    # Append the summary to the list
    new_summaries.append(rresponse)

# Assign the list of summaries to the DataFrame
new_merged_df["intro_summary"] = new_summaries

In [29]:
new_merged_df.to_csv('tfcc_summary_details_pegasus.csv', index=False)