In [1]:
import pandas as pd 
import numpy as np
import os 
import random
from dotenv import load_dotenv
load_dotenv()

os.chdir("..")
from app.entities.chat import Chat, Survey, ProductReview
from app.entities.enumerators import *

In [2]:
there_is_full_data = True

full_data_path = "./data/full_data.csv"
chat_file = "./data/rnr_chat_w_transcript.csv"
agent_trans_file = "./data/agent_transcript_topics.csv"
customer_trans_file = "./data/customer_transcript_topics.csv"
survey_file = "./data/RnR PCS Survey 2021-10-08.csv"
product_file = "./data/RnR Product Reviews 2021-10-08.csv"
topic_file = "./data/Rocks_N_Ropes_Chat_2022-06-21v2.csv"
date_dim_file = "./data/date_dim.csv"
scenarios_file = "./data/scenarios.csv"
customer_file = "./data/customers.csv"
product_customer_file = "./data/product_review_customers.csv"
agent_file = "./data/agents.csv"
products_file = "./data/products.csv"
text_builders_file = "./data/text_drivers_2.xlsx"

# read in data
chats = pd.read_csv(chat_file)
surveys = pd.read_csv(survey_file)
product_reviews = pd.read_csv(product_file) 
topics = pd.read_csv(topic_file)
agent_trans = pd.read_csv(agent_trans_file)
customer_trans = pd.read_csv(customer_trans_file)
scenarios = pd.read_csv(scenarios_file)
customers = pd.read_csv(customer_file)
agents = pd.read_csv(agent_file)
products = pd.read_csv(products_file)
product_customers = pd.read_csv(product_customer_file)
text_builders = pd.read_excel(text_builders_file,sheet_name=None)

if there_is_full_data: 
    full_data = pd.read_csv(full_data_path,sep="|")
else: 
    full_data = pd.concat([customers]*10)

    # add Contact Type
    full_data["contact_type"] = full_data.apply(lambda x : ContactType.random_by_dist(proba=[.0858,.1012,.1284,.6347,.0499]).name, axis =1)
    full_data["product_name"] = full_data.apply(lambda x : Product.random_by_dist(proba=[0.053,0.111,0.102,0.085,0.054,0.027,0.074,0.058,0.017,0.049,0.057,0.042,0.057,0.051,0.035,0.010,0.047,0.063,0.008]).name, axis =1)

    # add new id
    full_data.insert(0,"new_chat_id", range(1,1 + len(full_data)))
    full_data.shape
    full_data.to_csv("./data/full_data.csv",index=False,sep="|")

# remove index becauase I forgot index = false
agent_trans = agent_trans.iloc[:,1:]
customer_trans = customer_trans.iloc[:,1:]
chats = chats.iloc[:,1:]


In [3]:
def generate_text(task, tokenizer, model):
    # just used what is in memoroy already
    input_ids = tokenizer.encode(task, return_tensors='pt')
    greedy_output = model.generate(input_ids, num_beams=7, no_repeat_ngram_size=2, min_length=50, max_length=100)
    message = tokenizer.decode(greedy_output[0], skip_special_tokens=True)
    return message

def build_comment(topic_item, product, contact_type, tokenizer, model): 
    topic = topic_item.Topic.values[0]
    keyword = topic_item.Keyword.values[0]
    task = f"summarize:{product} {topic} {keyword} {contact_type}"
    task = task.replace("_"," ")
    message = generate_text(task=task, tokenizer=tokenizer, model=model)
    return message

def generate_random_date(start_date,end_date):
    import datetime
    import random

    # time_between_dates = end_date - start_date
    # days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange((end_date - start_date).days)
    random_date = start_date + datetime.timedelta(days=random_number_of_days, hours=random.randrange(0,24),minutes=random.randrange(0,60), seconds=random.randrange(0,60))
    return random_date.strftime("%Y-%m-%d %H:%M:%S")

def generate_random_time(start_hour, end_hour): 
    hours=random.randrange(start_hour,end_hour) 
    minutes=str(random.randrange(0,60)).zfill(2) 
    seconds=str(random.randrange(0,60)).zfill(2)

    suffix = "AM" if hours < 12 else "PM"
    hours_adjusted = hours - 12 if hours > 12 else hours

    return f"{hours_adjusted}:{minutes}:{seconds} {suffix}"

def comment_task(df,tokenizer,model): 
    df["new_comment"] = df.apply(lambda x: build_comment(tops.sample(1), x.product_name,x.contact_type,tokenizer, model),axis=1)
    return df

def comment_build(x,s_df): 
    # filter 
    filtered = s_df[(s_df["min"] <= x.survey_score) & (s_df["max"] >= x.survey_score)].copy()
    return filtered["response"].sample(n=1).values[0]

def comment_task_dumb(df,selection_df):

    """
    What was my idea here? what was I trying to accomplish? 
    1. for each df : 
        based on product and contact type 
            select random text from text builder df 

    """
    df["new_comment"] = df.apply(lambda x: comment_build(x,selection_df), axis=1)
    return df

# User Story 1 adjusting sitedown Survey sentiment

In [4]:
# set up text surveys
topic_filter = ["Errors","Crashing","Website Feedback-Negative","Website-Broken Links/Pages"]
tops = topics[topics.Topic.isin(topic_filter)].copy()

# filter for site down
text_builders_survey = text_builders["site_down_cust_survey"]
text_builders_survey = text_builders_survey[text_builders_survey.type=="site_down"].copy()
text_builders_survey["min"] = text_builders_survey["survey range"].apply(lambda x: int(x[0]))
text_builders_survey["max"] = text_builders_survey["survey range"].apply(lambda x: int(x[2]))

# just grab some preseeded surveys
outtage_surveys =chats[chats.site_down==1][["chat_number","site_down_sentiment","CustomerID","m_agent_ID"]]
outtage_surveys["survey_score"] = outtage_surveys.apply(lambda x : SurveyScore.random_by_dist(proba=[.2,.3,.1,.2,.2,0,0,0,0,0]).value, axis =1)

# random comment 
outtage_surveys["new_comment"] = outtage_surveys.apply(lambda x: comment_build(x,text_builders_survey), axis=1)

In [5]:
outtage_surveys.site_down_sentiment.value_counts()

Negative    1843
9           1346
Neutral      496
Name: site_down_sentiment, dtype: int64

In [6]:
def tag(score, sentiment):
    if sentiment == "9":
        return sentiment
    if score < 5: 
        return "Negative"
    elif score >= 5 and score <=6: 
        return "Neutral"
    
outtage_surveys["new_sentiment"] = outtage_surveys.apply(lambda x: tag(x.survey_score, x.site_down_sentiment), axis=1)

In [7]:
# adjust customers 
outtage_surveys = outtage_surveys.merge(customers[["id","first_name","last_name","email","member_number"]], how="left",left_on="CustomerID",right_on="id")
outtage_surveys["ContactName"] = outtage_surveys.apply(lambda x: x.first_name.upper() + " " + x.last_name.upper(), axis=1)
outtage_surveys = outtage_surveys.rename(columns={
    "email":"ContactEmail"
})

outtage_surveys = outtage_surveys.drop(columns=["first_name","last_name","id"])

# adjust agents
outtage_surveys = outtage_surveys.merge(agents[["id","first_name","last_name","team_name"]],how="left",left_on="m_agent_ID",right_on="id")
outtage_surveys["m_agent_name"] = outtage_surveys.apply(lambda x: x.first_name + " " + x.last_name, axis=1)
outtage_surveys = outtage_surveys.rename(columns={
    "team_name":"m_agent_team_ID"
})
outtage_surveys = outtage_surveys.drop(columns=["first_name","last_name","id"])

In [8]:
outtage_surveys = outtage_surveys.drop(columns=["site_down_sentiment"])
outtage_survesy = outtage_surveys.rename(columns={"new_commnet":"overall_experience_comment","new_sentiment":"site_down_sentiment"})

In [9]:
import datetime
outtage_surveys["TransactionDateUTC_"] = outtage_surveys.apply(lambda x: generate_random_date(start_date=datetime.datetime(2022,6,4),end_date=datetime.datetime(2022,6,6)), axis=1)
outtage_surveys["ResponseReceivedDateUTC_"] = outtage_surveys.apply(lambda x: generate_random_date(start_date=datetime.datetime(2022,6,6),end_date=datetime.datetime(2022,7,30)), axis=1)

outtage_surveys["trans_date"] = outtage_surveys.apply(lambda x: x["TransactionDateUTC_"].split(" ")[0],axis=1)
outtage_surveys["resp_date"] = outtage_surveys.apply(lambda x: x["ResponseReceivedDateUTC_"].split(" ")[0],axis=1)

dates = DateDim()
dates = dates.create_date_table()

# adjust dates
outtage_surveys = outtage_surveys.merge(dates.calendar[["Id","date_id"]], how="left", left_on="trans_date", right_on = "date_id")
outtage_surveys = outtage_surveys.rename(
    columns={
        "Id":"TransactionDateUTC"
    }
)

outtage_surveys = outtage_surveys.drop(columns="date_id")

outtage_surveys = outtage_surveys.merge(dates.calendar[["Id","date_id"]], how="left", left_on="resp_date", right_on = "date_id")
outtage_surveys = outtage_surveys.rename(
    columns={
        "Id":"ResponseReceivedDateUTC"
    }
)

outtage_surveys = outtage_surveys.drop(columns="date_id")

In [10]:
from dataclasses import replace

# reformat

outtage_surveys = outtage_surveys.merge(surveys[["m_contact_record_ID","ContactRecordID"]], how="inner", left_on="chat_number", right_on="m_contact_record_ID")

outtage_surveys.drop(columns=["chat_number"])
outtage_surveys = outtage_surveys.rename(
    columns={
        "new_comment":"overall_experience_comment"
    }
)
keep_cols = [
    'ContactRecordID','m_contact_record_ID', 'CustomerID', 'm_agent_ID', 'survey_score',
       'overall_experience_comment', 'ContactEmail', 'member_number',
       'ContactName', 'm_agent_team_ID', 'm_agent_name',
        'TransactionDateUTC', 'ResponseReceivedDateUTC'
]

outtage_surveys = outtage_surveys[keep_cols]

outtage_surveys = outtage_surveys.assign(SurveyDetails="pcs_chat_v3")


In [11]:
print(surveys.shape)
surveys_new = surveys[~surveys.m_contact_record_ID.isin(outtage_surveys.m_contact_record_ID)].copy()

surveys_new = pd.concat([surveys_new,outtage_surveys])
surveys_new

(2033, 13)


Unnamed: 0,ContactRecordID,SurveyDetails,m_contact_record_ID,ContactName,ContactEmail,member_number,m_agent_ID,m_agent_name,m_agent_team_ID,survey_score,overall_experience_comment,TransactionDateUTC,ResponseReceivedDateUTC,CustomerID
0,782819,pcs_chat_v3,26934,SPENSE FOLKE,sfolke59@patch.com,,141,Gratiana Moules,Campers,10,You value your customers,54,57,
1,782820,pcs_chat_v3,1280,NERON SCRIVNER,nscrivneraj@sfgate.com,,19,Johnathan Durbridge,Climbers,9,Always offers to help in extra ways,11,21,
2,782821,pcs_chat_v3,10955,CLIFFORD RENEHAN,crenehanp1@mapquest.com,,81,Myrtle Blachford,Campers,5,The code didn’t work two days later,18,18,
3,782822,pcs_chat_v3,3379,BOYCEY SEAGER,bseagerg5@gmpg.org,,173,Dorice Coffin,Runners,7,,7,8,
4,782823,pcs_chat_v3,11856,CORETTA BUZZA,cbuzzam2@tinypic.com,,30,Abbey Rivilis,Han Solo,6,Not worth it,51,52,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,783637,pcs_chat_v3,30898,MARIELE MORRANT,mmorrantmn@fastcompany.com,504930.0,120,Ginger Butterley,Canyoneers,5,"Everytime I have gone to the website, nothing ...",4,9,990.0
239,783575,pcs_chat_v3,30903,LIANNE STACEY,lstaceybh@hhs.gov,507073.0,29,Cass Broszkiewicz,Free Solo,2,the website is not working ever when I try to ...,3,18,1082.0
240,784410,pcs_chat_v3,30973,MICK EARPE,mearpei3@nba.com,504847.0,188,Celeste Costigan,Daytrippers,3,I always seem to have a problem on your site. ...,3,6,234.0
241,783437,pcs_chat_v3,31183,GERMAIN KENNON,gkennon4y@ovh.net,,192,Mayne McAuslene,Deep Water,2,Site needs help. Always timed out,4,6,3701.0


In [12]:

surveys_new.to_csv("./data/us1_final_site_down_surveys.csv",sep=",")

In [18]:
c = chats[["chat_number"]].copy()
c = c.merge(surveys[["m_contact_record_ID"]], how="left", left_on="chat_number", right_on="m_contact_record_ID")
c = c[c.m_contact_record_ID.notnull()].copy()
c

Unnamed: 0,chat_number,m_contact_record_ID
1,6,6.0
9,14,14.0
39,44,44.0
53,58,58.0
59,64,64.0
...,...,...
31259,31211,31211.0
31264,31216,31216.0
31309,31261,31261.0
31337,31289,31289.0


# User Story 2
## password reset 

### issues: 
- based on curent filters not enough interactions as it relates to password resets
- need more 

In [4]:
topic_export_path = "./data/topicsExport_20220907.csv"
texp = pd.read_csv(topic_export_path,sep=",")


  texp = pd.read_csv(topic_export_path,sep=",")


In [5]:
# merge chat to topic 
chats_t = chats.merge(texp[["m_contact_record_ID","Topic","Category"]], how="left",left_on="chat_number",right_on="m_contact_record_ID")

In [6]:
filters = ["Errors","Website-User/System","Password"]
chats_t = chats_t[chats_t.Topic.isin(filters)].copy()
chats_t = chats_t[chats_t.site_down==0].copy()

chats_t = chats_t.sample(frac=.4, replace=False) # 40 percent of current as password that is not related to website being down
chats_t.shape

(935, 137)

In [10]:
# Create new set of chat interactions 
pass_cust = full_data.sample(frac=.05,replace=False, random_state=45)
pass_cust.insert(0,"chat_id", range(31371,31371 + len(pass_cust))) # add new chat id

In [11]:
class Pipe:
    def __init__(self):
        self.pipe = []
        self.results=[]
        return

    def execute(self):
        self.results=[] # reset
        for step in self.pipe: 
            l = step[1](**step[2])
            self.results.append((step[0],l)) # append title and result of an execution
        return self
    def to_string(self):
        # just loop - sync in future
        text = ""
        for r in self.results:
            text = text + " ".join([r[0],":",r[1]]) + "|"
        return text

class Actor: 
    import random
    def __init__(self,actor_type=None, **kwargs):
        self.actor = actor_type
        for (key,value) in kwargs.items():
            self.__dict__[key] = value

    def response(self, responses=None, proba = None): 
        text = [random.choice(self.__dict__[r]) for r in responses]
        return " ".join(text)

password_complaints = [
    "I've forgotten my password and can't login",
    "i forgot my password i'm not able to enter the password anymore",
    "i forgot my password and can't access my internet account",
    "i forgot my password and can't get into my account",
    "i forgot my password and i cannot access my account",
    "i forgot my password and can't access my account",
    "i forgot my password and i can't get into my account",
    "i forgot my password and can't login to my account",
    "i forgot my password and i can't login to the website",
    "i forgot my password and i can't log into my account",
    "i forgot my password and i can't login to my account",
    "i forgot my password and i'm not able to login",
    "i forgot my password and i'm unable to log in",
    "i forgot my password and i can't log in anymore",
    "i forgot my password and i can't access it",
    "i have forgotten my password and i can't log in to my account",
    "i forgot my password and i can't register",
    "i forgot my password and now i can't log in",
    "i forgot my password and i can't get in",
    "i forgot my password and now i can't login",
    "i forgot my password and can't log in",
    "i forgot my password and i cannot log in",
    "i forgot my password and i cannot login",
    "i forgot my password and can't login",
    "i forgot my password and i can't log in",
    "i forget my password and i can't login",
    "i forgot my password and i can't login",
    "i have forgotten my password and i cannot login",
    "i have forgotten my password and i can't log on",
    "i have forgotten my password and i can't log in",
    "i have forgotten my password and i can't login",
    "i can't remember my password",
    "i can't remember my username so i can't sign into another site",
    "i can't remember my username and i can't log into the service",
    "i can't remember my username and i can't login anymore",
    "i can't remember my username and now i'm not able to log in",
    "i can't remember my username and now i'm not able to login",
    "i don't remember my username and i can't log in",
    "i can't remember my username and now i can't register",
    "i don't remember my username and i can't login",
    "i can't remember my username so i can't login",
    "i can't remember my username and can't login",
    "i can't remember my username and i can't login",
    "i don't remember my username and now i can't login",
    "i can't remember my username and now i can't log in",
    "i can't remember my username and now i can't login",
    "i have forgotten my password so i can access my site and the account",
    "i have forgotten my password i need to reset my account",
    "i have forgotten my password i need to recover my password",
    "i have forgotten my password",
    "can i reset my password?",
    "i have to reset the password",
    "i want to reset my password",
    "i have to reset my password",
    "do i need to reset my password?",
    "i need to reset my password",
    "you're website is awful and i'm not allowed to log in",
    "you're website is awful and i'm not able to log in",
    "your website is terrible and i can't log in to my computer",
    "your website is terrible and i can't log in to your site",
    "you have a terrible website and i can't log in",
    "you have a terrible website and i can't login",
    "your website is terrible and i can't login to the site",
    "your site is awful and i can't login",
    "your website is terrible and i can't access it",
    "your website is terrible and i can't log in to you",
    "your website is bad and i can't login",
    "your website is awful and i can't login",
    "your site is horrible and i can't login",
    "your website is terrible and i can't get in",
    "your website is horrible and i can't log in",
    "your site is terrible and i can't login",
    "your website is horrible and i can't login",
    "your website is terrible and i can't log in",
    "your website is terrible and i can't login"

    ]

agent_params= {
    "companies":[
        "Rocks and Ropes",
        "R and R",
        "Rock n Ropes",
        "RnR"
    ],
    "openings": [
        "Thank you for chatting with",
        "Thanks for chatting",
        "Thanks for contacting",
        "Welcome to ",
        "Hi, thanks for chatting",
        "It’s a great day at",
        "Thanks for contacting",
        "Welcome to",
        "It’s a great day at",
        "Welcome to Rocks N Ropes Live Chat! I'll be happy to help you today with your needs at"
    ],
    "agent_intro":[
        "This is",
        "My name is",
        "You're speaking with",
        "I'm",
        "Hi, this is",
        "You're chatting with",
        "Good morning, this is"

    ],
    "agent_name":[
        "[agent_name]"
    ],

    "drivers": [
                "How may I help you?",
                "How can I assist you today?",
                "How can I help you today?",
                "How may I help you today?",
                "What can I do to make it a great day?",
                "What can I do to assist you?",
                "How may I assist you?",
                "How can I help you get outdoors?",
                "How can I help you?",
                "What can do for you?",
                "What can we do for you?",
                "How can I help get you outside?",
                "How can I help get you outdoors?",
                "How might I help you?",
                "How might I assist you?"
            ],
    "acknowledgement":[
        "I can help you with that",
        "Let me see what I can do",
        "I'm happy to assist",
        "I can assist with that",
        "I think I can help you",
        "I don't know if I can help",
        "OK",
        "Let's see what we can do",
        "I'll be happy to assist you",
        "I'll be happy to assist",
        "I sure I can assist you",
    ], 
    "password_help":[
        "can i get an email with your account?"
        "can i get the email you registered for your account?",
        "can you send me an email with your account?",
        "can i get an email for your account?",
        "can i get an email linked to your account?",
        "can i get an email to your account?",
        "can i get an associated email to your account?",
        "can i get an associated email with your account?",
        "can i ask for an email to be associated with my account?",
        "can i get an email related to your account?",
        "can i get the email associated with your account?",
        "can i get a registered email associated with your account?",
        "can i get an e-mail associated with my account?",
        "can i get your email associated with your account?",
        "can i receive an email associated with your account?",
        "can i get an email associated with your account?",
        "can i get the email associated with your account?",
        "can i have an e-mail associated with your account?",
        "can you get a hold of an email associated with your account?",
        "can you get an email associated with your account?",
        "can i have an email associated with your account?",
        "can i get an e-mail associated with your account?",
        "is there a way to get an email associated with your account?",
        "can i get an email associated with your account?",
        "can i get your account email?",
        "can you get an associated email?"
    ], 
    "reset_password":[
        "Let me send you a reset link", 
        "i'll send you a link to reset",
        "let me send you the link to the reset",
        "i'll send you a link to reset it",
        "let me send you the reset link",
        "let me send you a reset link",
        "i can send you a link to reset your login and your account information",
        "i can send you a link to reset your login information",
        "i can send you a link to reset your login credentials",
        "i can send you a link to reset your password",
        "i can send you a link to reset your login details",
        "i can send you a link to reset your account",
        "i can send you a link to reset your log in",
        "i can send you a link to reset your login"
    ],
    "closing_question":[
        "is there anything i can do to help you today?",
        "can i help you with anything else today?",
        "what else can i do to help you today?",
        "can i help you with something else?",
        "can i help you in any way else today?",
        "can i help you with anything else?",
        "can i help you on anything else?",
        "what else can i help you with today?",
        "can i help you out with anything else today?",
        "is it possible i can help you with something else today?",
        "can i help you with anything else today?",
        "can i help you with something else today?",
        "can i help you with anything else today?"
    ], 
    "closing_wrap":[
        "Thanks for contacting",
        "Thanks for reaching out to us at",
        "Thank you for being a great customer at",
        "Thanks for loving the outdoors with ",
        "Thank you for contacting",
        "Thank you for loving the outdoors with",
        "Thank you for getting outdoors with",
        "Thank you for being outdoors with",
        "Well, thank you for contacting",
        "Well, thanks for contacting",
        "Hey, thanks for contacting",
        "Hey, thank you for contacting"
    ]
}


cust_params={
    "greetings":[
        "Hi","Hello","Hey","Howdy","What's Up"
    ],
    "password_complaints":password_complaints,
    "acknowledgement":[
        "Okay","Fine",
        "Ugh",
        "Sure",
        "Yes",
        "One moment",
        "I guess",
        "Let me see",
        "Sure thing",
        "You got it",
        "gotcha",
        "Ah okay",
        "Not a problem",
        "Yeah"
    ],
    "email_acknowledgement": [
        "my email is [email]",
        "it's [email]",
        "[email]"
    ],
    "response_ack":[
        "great",
        "okay","thank you",
        "awesome",
        "thank you very much",
        "wonderful",
        "yes that is great",
        "that works"
    ],
    "closing":[
        "No I'm okay",
        "No I'm good",
        "I'm fine thank you.",
        "That's it",
        "I got nothing else.",
        "No, Thank you",
        "no bye",
        "this will be all", 
        "I do not need anymore help",
        "No",
        "not right now"
    ]
}
p = Pipe()
a = Actor(**agent_params)
c = Actor(**cust_params)

# map the title to the column header it needs to go to
pp = [
    ("a_agent_opening",a.response,{"responses":["openings","companies"]}),
    ("a_agent_intro",a.response,{"responses":["agent_intro","agent_name"]}),
    ("c_customer_intro",c.response,{"responses":["greetings"]}),
    ("a_contact_driver_prompt",a.response,{"responses":["drivers"]}),
    ("c_contact_driver",c.response,{"responses":["password_complaints"]}),
    ("a_agent_request_acknowledgement",a.response,{"responses":["acknowledgement"]}),
    ("a_product_acknowledge",a.response,{"responses":["password_help"]}),
    ("c_address_response",c.response,{"responses":["acknowledgement","email_acknowledgement"]}),
    ("a_reset_password_prompt",a.response,{"responses":["reset_password"]}),
    ("c_password_acknowledgement",c.response,{"responses":["response_ack"]}),
    ("a_agent_closing_start",a.response,{"responses":["closing_question"]}),
    ("c_customer_closing_response",c.response,{"responses":["closing"]}),
    ("a_agent_closing_wrap",a.response,{"responses":["closing_wrap","companies"]}),
]


p.pipe = pp
p.execute()
print(p.to_string())

a_agent_opening : Thanks for contacting R and R|a_agent_intro : You're chatting with [agent_name]|c_customer_intro : Hey|a_contact_driver_prompt : What can I do to assist you?|c_contact_driver : i forgot my password and i can't get in|a_agent_request_acknowledgement : I don't know if I can help|a_product_acknowledge : can i get an email to your account?|c_address_response : One moment it's [email]|a_reset_password_prompt : i can send you a link to reset your password|c_password_acknowledgement : wonderful|a_agent_closing_start : can i help you with anything else today?|c_customer_closing_response : this will be all|a_agent_closing_wrap : Hey, thanks for contacting Rocks and Ropes|


In [12]:
pass_cust["new_transcript"] = pass_cust.apply(lambda x: p.execute().to_string(), axis=1)

# change email parameters
pass_cust["new_transcript"] = pass_cust.apply(lambda x: x.new_transcript.replace("[email]",x.email), axis=1)




In [13]:
# reformat to chat upload
pass_cust.columns.values

columns_rename = {
    "id":"m_customer_id",
    "first_name":"m_customer_first_name",
    "last_name":"m_customer_last_name",
    'new_chat_id':"chat_number",
    'email':"m_customer_email",
    'street_address':"m_customer_street_address", 
    'city':"m_customer_city",
    'state':"m_customer_state", 
    'zip':"m_customer_zip", 
    'phone':"m_customer_phone", 
    'gender':"m_customer_gender",
    'membership_start':"m_customer_membership_start", 
    'customer_level':"m_customer_level", 
    'member_number':"m_member_number", 
    'Frequency':"m_customer_Frequency"
}

pass_cust = pass_cust.rename(columns=columns_rename)

In [14]:
# randomly assign agent to interaction
pass_cust["m_agent_ID"] = pass_cust.apply(lambda x:agents.id.sample(n=1).values[0], axis=1)
pass_cust_ = pass_cust.merge(agents, how="left",left_on="m_agent_ID",right_on="id")
pass_cust_.head()

Unnamed: 0,chat_id,chat_number,m_customer_id,m_customer_first_name,m_customer_last_name,m_customer_email,m_customer_street_address,m_customer_city,m_customer_state,m_customer_zip,...,contact_type,product_name,new_transcript,m_agent_ID,id,first_name,last_name,extension,agent_tenure_days,team_name
0,31371,42463,2463,Wynn,Dyne,wdynebn@yelp.com,43 Linden Avenue,Buffalo,New York,14233.0,...,Defect,CudaFooledMe_backpack,a_agent_opening : Thanks for contacting Rocks ...,16,16,Loraine,Boniface,4382,812,Backcountry
1,31372,11770,1770,Georgette,Fentem,gfentembv@princeton.edu,30 Daystar Parkway,Los Angeles,California,90050.0,...,Defect,Illusion_camp_bag,a_agent_opening : Thanks for contacting Rock n...,104,104,Gabriello,Ziemen,4995,336,Han Solo
2,31373,32450,2450,Korney,Postgate,kpostgateb5@taobao.com,384 Fisk Place,San Antonio,Texas,78296.0,...,Promo,CudaFooledMe_backpack,a_agent_opening : Welcome to Rocks N Ropes Liv...,53,53,Evelina,Minchinton,4579,869,Daytrippers
3,31374,18689,3689,Sorcha,Falconar,sfalconar4f@parallels.com,617 Kropf Lane,Spring Hill,Florida,34611.0,...,Sales_Order,Hoax_Max_sleeping_bag,a_agent_opening : Welcome to Rocks N Ropes Liv...,173,173,Dorice,Coffin,4545,400,Runners
4,31375,20343,343,Pall,Amiss,pamissam@bloomberg.com,1899 Texas Crossing,Bethesda,Maryland,20816.0,...,Defect,Unreal_kettle_mess_kit,a_agent_opening : Welcome to Rock n Ropes|a_a...,8,8,Tedda,Dorrins,4875,1229,Daytrippers


In [15]:
# reformat agents
columns_rename = {
    "id":"m_agent_id",
    "first_name":"m_agent_first_name",
    "last_name":"m_agent_last_name",
    "extension": "m_agent_extension",
    "agent_tenure_days": "m_agent_tenure_days",
    "team_name":"m_agent_team_name"
}

pass_cust_ = pass_cust_.rename(columns=columns_rename)

# add agent name to transcript
pass_cust_["new_transcript"] = pass_cust_.apply(lambda x: x.new_transcript.replace("[agent_name]",x["m_agent_first_name"]), axis=1)


In [16]:
pass_cust_

"""
add the following - but for this let's keep it to 0 on the booleans as it is restricted to passowrd reset
site_down
sale_accept
site_down_sentiment
product_name
product_family
order_identifier
carrier
customer_state
slow_ship_state
wismo_complaint
promo_fail_reason
code_exception
purchase_date
warranty
rma
return_reason
exchange_offered
exchange_accepted
gift
refund
day_of_week
week_number
chat_number
m_contact_day_number
m_contact_time_start
m_contact_time_end
m_contact_duration
m_contact_messages

keep it to even distributoin on days with a couple of spikes
"""
# assign the known story values to boolean 0
pass_cust_= pass_cust_.assign(
    site_down=0,
    sale_accept=0,
    site_down_sentiment=9,
    wismo_complaint=0,
    promo_fail_reason = 9,
    code_exception=0,
    rma=0,
    return_reason=9,
    exchange_offered=0,
    exchange_accepted=0,
    gift=0,
    refund=0,
    a_agent_opening=9,
    a_agent_intro=9,
    c_customer_intro=9,
    a_contact_driver_prompt=9,
    c_contact_driver=9,
    a_agent_request_acknowledgement=9,
    a_product_acknowledge=9,
    c_address_response=9,
    a_reset_password_prompt=9,
    c_password_acknowledgement=9,
    a_agent_closing_start=9,
    c_customer_closing_response=9,
    a_agent_closing_wrap=9
)




In [17]:

# add random on some 
pass_cust_["carrier"] = pass_cust_.apply(lambda x: Carrier.random_by_dist(proba=[.33,.33,.34]), axis=1)

# do this in loop array for now 
def split_assign(index, df):
    for i in index:
        text_array = df.iloc[i].new_transcript.split("|")
        for t in text_array: 
            t_ = t.split(":")
            if len(t_)<2:
                continue
            else:
                df.at[i,t_[0]] = t_[1]
    return df

# split transcript into appropriate splits
pass_cust_t = split_assign(
    pass_cust_.index.tolist(),
    pass_cust_
)



In [18]:
from enum import Enum,Flag, auto
import numpy as np
import pandas as pd
from datetime import datetime,timedelta
import time
class DateDim:
    def __init__(self): 
        self.calendar = None

    def create_date_table (self, start=None,end=None):
        if start is None: 
            dates = self.__last_day(datetime.utcnow(),"sunday")
            start = dates["start"]
            end = dates["end"]
        df = pd.DataFrame({"Date": pd.date_range(start, end)})
        df["Day"] = df.Date.dt.weekday
        df["Week"] = df.Date.dt.isocalendar().week
        df["Quarter"] = df.Date.dt.quarter
        df["Year"] = df.Date.dt.year
        df["Year_half"] = (df.Quarter + 1) // 2
        df.insert(1, 'Id', range(1, 1 + len(df)))

        # friendly id
        df["date_id"] = df.Date.apply(lambda x: x.strftime("%Y-%m-%d"))
        self.calendar = df
        
        return self

    def random_by_dist(self, proba:list):
        # weight a selection so if need to randomly select can do so
        # in this case use date id's = in order to weight probability of things happening on certain day
        return np.random.choice(np.array(self.calendar.Id.tolist()), p = proba)

    def create_probability_dist(self, probabilities=None, start=None, end=None,date_list:list=None): 
        # create probability distribution for user based on start and end date or list of dates

        if np.sum(probabilities)!=1.0: 
            raise Exception("probabilities must add to 1.0")

        dist = [0]*(self.calendar.shape[0])

        if date_list is None: 
            # use the date range
            # blank range
            ids = self.calendar[(self.calendar.Date >=start) & (self.calendar.Date <=end)].Id.copy()
            
        else: 
            # if random days provided instead 
            ids = self.calendar[self.calendar.date_id.isin(date_list)].Id.tolist()
        for i in range(0,len(ids)):

            dist[ids[i]] = probabilities[i]
        return dist

    def __last_day(self,d, day_name):
        days_of_week = ['sunday','monday','tuesday','wednesday',
                            'thursday','friday','saturday']
        target_day = days_of_week.index(day_name.lower())
        delta_day = target_day - d.isoweekday()
        if delta_day >= 0: delta_day -= 7 # go back 7 days
        ds = {
            "end": (d + timedelta(days=delta_day)).strftime("%Y-%m-%d"),
            "start": (d + timedelta(days=delta_day - 94)).strftime("%Y-%m-%d")
        }
        return ds

# add date ? 
dates = DateDim()
dates = dates.create_date_table()

date_list = dates.calendar.date_id.sample(n=10, replace=False)
date_prob = [.05,.05,.1,.2,.03,.1,.1,.15,.15,.07] # probabilities for dates
date_p_list = dates.create_probability_dist(probabilities=date_prob,date_list=date_list)

pass_cust_["interaction_date_id"] = pass_cust_.apply(lambda x: dates.random_by_dist(proba=date_p_list), axis=1)


In [19]:
# join interaction date
pass_cust_ = pass_cust_.merge(dates.calendar[["Id","Day","Date"]], how="left",left_on="interaction_date_id", right_on="Id")

# ADd rolling week number?
week_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14]
pass_cust_["week_number"] = np.tile(week_list, len(pass_cust_)//len(week_list) + 1)[:len(pass_cust_)]

# add times 
pass_cust_["m_contact_time_start"] = pass_cust_.apply(lambda x: generate_random_time(9,17), axis=1) 
pass_cust_["m_contact_duration"] = pass_cust_.apply(lambda x: random.randrange(2,20),axis=1)
pass_cust_["m_contact_time_end"] = pass_cust_.apply(lambda x: datetime.strftime(datetime.strptime(x.m_contact_time_start,"%H:%M:%S %p") + timedelta(minutes=x.m_contact_duration),"%H:%M:%S %p"),axis=1)

In [22]:
pass_cust_["m_contact_messages"] = pass_cust_.apply(lambda x: random.randrange(5,20),axis=1)


In [24]:
pass_cust_.to_csv("./password_story.csv",sep=",")

In [26]:
cols = ['chat_id', 'm_customer_id', 'm_customer_first_name',
       'm_customer_last_name', 'm_customer_email',
       'm_customer_street_address', 'm_customer_city', 'm_customer_state',
       'm_customer_zip', 'm_customer_phone', 'm_customer_gender',
       'm_customer_membership_start', 'm_customer_level',
       'm_member_number', 'm_customer_Frequency', 'contact_type',
       'product_name', 'm_agent_ID',
       'm_agent_first_name', 'm_agent_last_name', 'm_agent_extension',
       'm_agent_tenure_days', 'm_agent_team_name', 'site_down',
       'sale_accept', 'site_down_sentiment', 'wismo_complaint',
       'promo_fail_reason', 'code_exception', 'rma', 'return_reason',
       'exchange_offered', 'exchange_accepted', 'gift', 'refund',
       'a_agent_opening', 'a_agent_intro', 'c_customer_intro',
       'a_contact_driver_prompt', 'c_contact_driver',
       'a_agent_request_acknowledgement', 'a_product_acknowledge',
       'c_address_response', 'a_reset_password_prompt',
       'c_password_acknowledgement', 'a_agent_closing_start',
       'c_customer_closing_response', 'a_agent_closing_wrap', 'carrier',
       'a_agent_opening ', 'a_agent_intro ', 'c_customer_intro ',
       'a_contact_driver_prompt ', 'c_contact_driver ',
       'a_agent_request_acknowledgement ', 'a_product_acknowledge ',
       'c_address_response ', 'a_reset_password_prompt ',
       'c_password_acknowledgement ', 'a_agent_closing_start ',
       'c_customer_closing_response ', 'a_agent_closing_wrap ',
       'interaction_date_id','Day',
       'm_contact_time_start', 'm_contact_time_end', 'm_contact_duration',
       'm_contact_messages']

pass_cust_c = pass_cust_[cols].copy()


In [27]:
pass_cust_c = pass_cust_c.rename(columns={
    "chat_id":"chat_number",
    "interaction_date_id": "m_contact_day_number",
    "Day":"day_of_week"
})

In [28]:
pass_cust_c.to_csv("./data/password_story_for_upload.csv",sep=",",index=False)