The code bellow assumes that there is only one sales rep per call (Agent Name and Agent Employee Email)

In [16]:
import pandas as pd

# 1. Load CSV
df = pd.read_csv("Pre-Clean_Data/B2BGTMFullCallTranscript.csv", encoding="latin1")

# 2. Clean up Verbatim_Type
df["Verbatim_Type"] = (
    df["Verbatim_Type"]
    .str.lower()
    .str.replace("verbatimcall", "", regex=False)
    .str.strip()
    .str.capitalize()
)

# 3. Sort by call and time
df = df.sort_values(["Customer Account Number", "CB Sentence Start Time"]).reset_index(drop=True)

# 4. Function to merge consecutive sentences by same speaker
def merge_consecutive_speakers(group):
    merged = []
    current_speaker = None
    buffer = []

    for speaker, sentence in zip(group["Verbatim_Type"], group["Sentences"]):
        if speaker == current_speaker:
            buffer.append(sentence.strip())
        else:
            if current_speaker is not None:
                merged.append(f"[{current_speaker}] {' '.join(buffer)}")
            current_speaker = speaker
            buffer = [sentence.strip()]
    # Add the last buffered chunk
    if current_speaker is not None:
        merged.append(f"[{current_speaker}] {' '.join(buffer)}")

    return " ".join(merged)

# 5. Group by Customer Account Number and combine sentences
conversation_summary = (
    df.groupby("Customer Account Number")
    .apply(lambda g: pd.Series({
        "call_text": merge_consecutive_speakers(g),
        "Agent Name": g["Agent Name"].iloc[0],
        "agent employee email": g["agent employee email"].iloc[0],
        "customer street address": g["customer street address"].iloc[0],
        "customer city": g["customer city"].iloc[0],
        "customer state": g["customer state"].iloc[0],
        "customer zip code": g["customer zip code"].iloc[0]
    }))
    .reset_index()
)


# 7. Display truncated preview
pd.set_option("display.max_colwidth", 150)
conversation_summary.head(3)


  df = pd.read_csv("Pre-Clean_Data/B2BGTMFullCallTranscript.csv", encoding="latin1")
  .apply(lambda g: pd.Series({


Unnamed: 0,Customer Account Number,call_text,Agent Name,agent employee email,customer street address,customer city,customer state,customer zip code
0,770110278303,"[Client] I mean my cell phones do not cost that much. OK. [Agent] Perfect thank you. So right now with the [Client] But uh Right. Yeah, I will con...","chambliss, desmond",desmond.chambliss@optimum.com,102 crystal palace dr,caddo valley,ar,71923.0
1,770110361701,"[Agent] The phone lines are unlimited throughout the US, Canada, as well as Puerto Rico [Client] Yeah. Well, I got, I have got AT&T and Verizon. [...","lawrence, dana",dana.lawrence@optimum.com,718 grove st,el dorado,ar,71730.0
2,770110528201,"[Agent] And no more Tom. [Client] It is AP At optimal wireless.com. [Agent] OK And I will just keep this one, the AP It looks like you just have t...","morano, saraphina",saraphina.morano@optimum.com,909 main st,magnolia,ar,71753.0


In [17]:
# Export conversation_summary to CSV
conversation_summary.to_csv("AlticeAllCallDataSummary.csv", index=False, quotechar='"')