In [1]:
import pandas as pd 
import numpy as np
import os 
import random
from dotenv import load_dotenv
load_dotenv()

os.chdir("..")
from app.entities.chat import Chat, Survey, ProductReview
from app.entities.enumerators import *

In [2]:
there_is_full_data = True

full_data_path = "./data/full_data.csv"
chat_file = "./data/rnr_chat_w_transcript.csv"
agent_trans_file = "./data/agent_transcript_topics.csv"
customer_trans_file = "./data/customer_transcript_topics.csv"
survey_file = "./data/RnR PCS Survey 2021-10-08.csv"
product_file = "./data/RnR Product Reviews 2021-10-08.csv"
topic_file = "./data/Rocks_N_Ropes_Chat_2022-06-21v2.csv"
date_dim_file = "./data/date_dim.csv"
scenarios_file = "./data/scenarios.csv"
customer_file = "./data/customers.csv"
product_customer_file = "./data/product_review_customers.csv"
agent_file = "./data/agents.csv"
products_file = "./data/products.csv"
text_builders_file = "./data/text_drivers_2.xlsx"

# read in data
chats = pd.read_csv(chat_file)
surveys = pd.read_csv(survey_file)
product_reviews = pd.read_csv(product_file) 
topics = pd.read_csv(topic_file)
agent_trans = pd.read_csv(agent_trans_file)
customer_trans = pd.read_csv(customer_trans_file)
scenarios = pd.read_csv(scenarios_file)
customers = pd.read_csv(customer_file)
agents = pd.read_csv(agent_file)
products = pd.read_csv(products_file)
product_customers = pd.read_csv(product_customer_file)
text_builders = pd.read_excel(text_builders_file,sheet_name=None)

if there_is_full_data: 
    full_data = pd.read_csv(full_data_path,sep="|")
else: 
    full_data = pd.concat([customers]*10)

    # add Contact Type
    full_data["contact_type"] = full_data.apply(lambda x : ContactType.random_by_dist(proba=[.0858,.1012,.1284,.6347,.0499]).name, axis =1)
    full_data["product_name"] = full_data.apply(lambda x : Product.random_by_dist(proba=[0.053,0.111,0.102,0.085,0.054,0.027,0.074,0.058,0.017,0.049,0.057,0.042,0.057,0.051,0.035,0.010,0.047,0.063,0.008]).name, axis =1)

    # add new id
    full_data.insert(0,"new_chat_id", range(1,1 + len(full_data)))
    full_data.shape
    full_data.to_csv("./data/full_data.csv",index=False,sep="|")

# remove index becauase I forgot index = false
agent_trans = agent_trans.iloc[:,1:]
customer_trans = customer_trans.iloc[:,1:]
chats = chats.iloc[:,1:]


In [3]:
def generate_text(task, tokenizer, model):
    # just used what is in memoroy already
    input_ids = tokenizer.encode(task, return_tensors='pt')
    greedy_output = model.generate(input_ids, num_beams=7, no_repeat_ngram_size=2, min_length=50, max_length=100)
    message = tokenizer.decode(greedy_output[0], skip_special_tokens=True)
    return message

def build_comment(topic_item, product, contact_type, tokenizer, model): 
    topic = topic_item.Topic.values[0]
    keyword = topic_item.Keyword.values[0]
    task = f"summarize:{product} {topic} {keyword} {contact_type}"
    task = task.replace("_"," ")
    message = generate_text(task=task, tokenizer=tokenizer, model=model)
    return message

def generate_random_date(start_date,end_date):
    import datetime
    import random

    # time_between_dates = end_date - start_date
    # days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange((end_date - start_date).days)
    random_date = start_date + datetime.timedelta(days=random_number_of_days, hours=random.randrange(0,24),minutes=random.randrange(0,60), seconds=random.randrange(0,60))
    return random_date.strftime("%Y-%m-%d %H:%M:%S")

def comment_task(df,tokenizer,model): 
    df["new_comment"] = df.apply(lambda x: build_comment(tops.sample(1), x.product_name,x.contact_type,tokenizer, model),axis=1)
    return df

def comment_build(x,s_df): 
    # filter 
    filtered = s_df[(s_df["min"] <= x.survey_score) & (s_df["max"] >= x.survey_score)].copy()
    return filtered["response"].sample(n=1).values[0]

def comment_task_dumb(df,selection_df):

    """
    What was my idea here? what was I trying to accomplish? 
    1. for each df : 
        based on product and contact type 
            select random text from text builder df 

    """
    df["new_comment"] = df.apply(lambda x: comment_build(x,selection_df), axis=1)
    return df

# User Story 1 adjusting sitedown Survey sentiment

In [4]:
# set up text surveys
topic_filter = ["Errors","Crashing","Website Feedback-Negative","Website-Broken Links/Pages"]
tops = topics[topics.Topic.isin(topic_filter)].copy()

# filter for site down
text_builders_survey = text_builders["site_down_cust_survey"]
text_builders_survey = text_builders_survey[text_builders_survey.type=="site_down"].copy()
text_builders_survey["min"] = text_builders_survey["survey range"].apply(lambda x: int(x[0]))
text_builders_survey["max"] = text_builders_survey["survey range"].apply(lambda x: int(x[2]))

# just grab some preseeded surveys
outtage_surveys = site_down_chats = chats[chats.site_down==1][["chat_number","site_down_sentiment","CustomerID","m_agent_ID"]]
outtage_surveys["survey_score"] = outtage_surveys.apply(lambda x : SurveyScore.random_by_dist(proba=[.2,.3,.1,.2,.2,0,0,0,0,0]).value, axis =1)

# random comment 
outtage_surveys["new_comment"] = outtage_surveys.apply(lambda x: comment_build(x,text_builders_survey), axis=1)

In [5]:
outtage_surveys.site_down_sentiment.value_counts()

Negative    1843
9           1346
Neutral      496
Name: site_down_sentiment, dtype: int64

In [6]:
def tag(score, sentiment):
    if sentiment == "9":
        return sentiment
    if score < 5: 
        return "Negative"
    elif score >= 5 and score <=6: 
        return "Neutral"
    
outtage_surveys["new_sentiment"] = outtage_surveys.apply(lambda x: tag(x.survey_score, x.site_down_sentiment), axis=1)

In [9]:
# adjust customers 
outtage_surveys = outtage_surveys.merge(customers[["id","first_name","last_name","email","member_number"]], how="left",left_on="CustomerID",right_on="id")
outtage_surveys["ContactName"] = outtage_surveys.apply(lambda x: x.first_name.upper() + " " + x.last_name.upper(), axis=1)
outtage_surveys = outtage_surveys.rename(columns={
    "email":"ContactEmail"
})

outtage_surveys = outtage_surveys.drop(columns=["first_name","last_name","id"])

# adjust agents
outtage_surveys = outtage_surveys.merge(agents[["id","first_name","last_name","team_name"]],how="left",left_on="m_agent_ID",right_on="id")
outtage_surveys["m_agent_name"] = outtage_surveys.apply(lambda x: x.first_name + " " + x.last_name, axis=1)
outtage_surveys = outtage_surveys.rename(columns={
    "team_name":"m_agent_team_ID"
})
outtage_surveys = outtage_surveys.drop(columns=["first_name","last_name","id"])

In [11]:
outtage_surveys = outtage_surveys.drop(columns=["site_down_sentiment"])
outtage_survesy = outtage_surveys.rename(columns={"new_commnet":"overall_experience_comment","new_sentiment":"site_down_sentiment"})

In [14]:
import datetime
outtage_surveys["TransactionDateUTC"] = outtage_surveys.apply(lambda x: generate_random_date(start_date=datetime.datetime(2022,4,4),end_date=datetime.datetime(2022,4,6)), axis=1)
outtage_surveys["ResponseReceivedDateUTC"] = outtage_surveys.apply(lambda x: generate_random_date(start_date=datetime.datetime(2022,4,6),end_date=datetime.datetime(2022,4,30)), axis=1)

In [16]:
outtage_surveys.to_csv("./data/us1_final_site_down_surveys.csv",sep=",")

# User Story 2