<a href="https://colab.research.google.com/github/rabbitmetrics/voice-of-customer/blob/main/notebooks/voice-of-customer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Link to the data https://cseweb.ucsd.edu/~jmcauley/datasets/amazon_v2/

In [None]:
import os
import gzip
import json
import pandas as pd

from dotenv import load_dotenv,find_dotenv

load_dotenv(find_dotenv())

In [None]:
# Extract data and load to dataframes

data = []
with gzip.open('AMAZON_FASHION.json.gz') as f:
    for l in f:
        data.append(json.loads(l.strip()))
        
metadata = []
with gzip.open('meta_AMAZON_FASHION.json.gz') as f:
    for l in f:
        metadata.append(json.loads(l.strip()))
        
df = pd.DataFrame.from_dict(data)
df = df[df['reviewText'].notna()]
df_meta=pd.DataFrame.from_dict(metadata)

df = df.loc[df['asin'] == 'B000KPIHQ4'].copy()
df_meta = df_meta.loc[df_meta['asin'] == 'B000KPIHQ4'].copy()

In [None]:
# Prepare lists for Pinecone upload

texts=df['reviewText'].tolist()
metadata=[dict(rating=i) for i in df['overall'].tolist()]

In [None]:
# Load embeddings

from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings()

In [None]:
# Add review embeddings with metadata

vstore = Pinecone.from_texts(texts, embeddings, index_name='voice',metadatas=metadata)

In [None]:
# Filtered similarity search in LangChain

docs=vstore.similarity_search("The Powerstep Pinnacle Shoe Insoles are fantastic", 100, filter= {"rating":5.0})

In [None]:
# Load Chat Model and summarize chain for writing summary of reviews and ad copy

from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain

chat = ChatOpenAI(model_name="gpt-4",temperature=0.2)

In [None]:
# Write summary of reviews

prompt_template_summary = """
Write a summary of the reviews:

{text}

The summary should be about ten lines long
"""
PROMPT = PromptTemplate(template=prompt_template_summary, input_variables=["text"])
chain = load_summarize_chain(chat, chain_type="stuff", prompt=PROMPT)
summary=chain.run(docs)

In [None]:
# Write ad copy for Facebook ad

prompt_template_fb = """
Write the copy for a facebook ad based on the reviews:

{text}

As far as text goes, you can have up to 40 characters in your headline, 
125 characters in your primary text, and 30 characters in your description
"""
PROMPT = PromptTemplate(template=prompt_template_fb, input_variables=["text"])
chain = load_summarize_chain(chat, chain_type="stuff", prompt=PROMPT)
fb_copy=chain.run(docs)

In [None]:
# Load tools needed for connecting LangChain and Zapier

from langchain.agents.agent_toolkits import ZapierToolkit
from langchain.utilities.zapier import ZapierNLAWrapper

from langchain.agents import initialize_agent
zapier = ZapierNLAWrapper()
toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier)

In [None]:
# Formulate email based on customer info, customer reviews and summary of all reviews.

def formulate_email(email,name,review,summary):
    q=f"""
    The customer {name} just gave the following review {review}
    Formulate and send an email to {email} based on the review that {name} gave
    and take into account the overall summary of the review given here: '{summary}'. 
    The email should be signed with the name Benjamin
    """
    return q

In [None]:
# Add fake names and emails
from faker import Faker
faker = Faker()
domain='@YourLangchainTestDomain.com'

df['first_name']=df.apply(lambda row: faker.first_name(), axis=1)
df['last_name']=df.apply(lambda row: faker.last_name(), axis=1)
df['email']=df.apply(lambda row: 
                     row['first_name'].lower()+row['last_name'].lower()+domain, 
                     axis=1)
df=df[['first_name','last_name','reviewText','email','overall']]

In [None]:
# Instantiate agent and send emails 

from langchain.llms import OpenAI
llm = OpenAI(temperature=0.2)

agent = initialize_agent(toolkit.get_tools(), llm, 
        agent="zero-shot-react-description", verbose=True)

df.apply(lambda row:agent.run(formulate_email(row['email'],
                                                      row['first_name'],
                                                      row['reviewText'],
                                                      summary)),axis=1)