<a href="https://colab.research.google.com/github/rabbitmetrics/personalize-LLMs/blob/main/notebooks/personalization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qU \
  python-dotenv==1.0.0 \
  langchain==0.0.313 \
  tiktoken==0.5.1 \
  openai==0.28.1 \
  klaviyo-api==5.2.0 \
  ShopifyAPI==12.3.0 \
  redis==5.0.1 \
  pandas-gbq==0.19.2 \
  Faker==19.12.0

In [None]:
# Load environment variables from .env file

import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
# Connect to Redis database (redis.com) with redis-py

import redis
from langchain.vectorstores.redis import Redis

url=os.getenv('REDIS_URL')

host=os.getenv('REDIS_HOST')
password=os.getenv('REDIS_PASSWORD')
port=int(os.getenv('REDIS_PORT'))


r = redis.Redis(
  host=host,
  port=port,
  password=password)

In [None]:
# Check redis connection

r.ping()



In [None]:
# Check if anything is stored in the database, flush if needed

r.keys()
#r.flushdb() # in case you need to delete the data again

# Shopify Product Data to Redis: Building The Retriever

In [None]:
# Generic functions used for extracting data from Shopify REST API

import os
import shopify
import pandas as pd

token = os.getenv('SHOPIFY_TOKEN')
merchant= os.getenv('SHOPIFY_MERCHANT')

api_session = shopify.Session(merchant,'2023-04', token)
shopify.ShopifyResource.activate_session(api_session)

def get_data(object_name):
    all_data=[]
    attribute=getattr(shopify,object_name)
    data=attribute.find(since_id=0, limit=250)
    for d in data:
        all_data.append(d)
    while data.has_next_page():
        data=data.next_page()
        for d in data:
            all_data.append(d)
    return all_data

def product_frame(products):
    all_products=[]
    for product in products:
        p=product.attributes
        record={k: p.get(k, None) for k in ('id', 'title','vendor','body_html','handle','status','tags')}
        record['price']=p['variants'][0].attributes['price']
        all_products.append(record)
    df=pd.DataFrame(all_products)
    return df

In [None]:
# Extract product data from Shopify (or json file) and transform into a suitable format for vector storage.
# A sample of Shopify products can be found on https://github.com/rabbitmetrics/personalize-LLMs

products=get_data('Product')
frame=product_frame(products)

#frame.reset_index(drop=True).to_json('products.json',orient='records')
#frame=pd.read_json('products.json')


max_text_length=800
def truncate_text(text):
    return text[:max_text_length]
frame['body_html']=frame.apply(lambda row: truncate_text(row['body_html']),axis=1)

product_data=frame.reset_index(drop=True).to_dict(orient='index')

texts = [
    v['title'] for k, v in product_data.items()
]

metadatas = list(product_data.values())

In [None]:
# Load OpenAI embeddings, you can also use HuggingFace embeddings by pip installing SentenceTransformers

from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
# Import Redis and load vector data from LangChain

from langchain.vectorstores.redis import Redis

vector_schema = {"algorithm": "HNSW","initial_cap": 400}

rds = Redis.from_texts(
    texts,
    embeddings,
    metadatas=metadatas,
    redis_url=url,
    index_name="shopify_products",
    vector_schema=vector_schema,

)

# check that vector data has been added
r.keys()

In [None]:
# To delete the products from the database run
#for key in r.scan_iter("doc:shopify_products:*"):
#  r.delete(key)

In [None]:
# Check that we can do VSS
docs=rds.similarity_search("Adidas shoes", 5)
docs

In [None]:
# Write the schema to a yaml file and use it to connect the existing index from another instance

rds.write_schema("redis_schema.yaml")

# Loading Personalization Data to BigQuery

In [None]:
import pandas as pd
import datetime
from faker import Faker
import random

from google.oauth2 import service_account
import pandas_gbq

In [None]:
incentives = ['bogo','free_shipping','special_offer','free_gift','10% discount','no_incentive']

faker = Faker()
domain='your_domain' # you can just add some id instead that can be used as a primary key

def customer_frame():
    ict=[random.choice(incentives) for i in range(100)]
    df=pd.DataFrame(ict,columns=['incentive'])
    df['feature_timestamp']=df.apply(lambda row: datetime.datetime.now()-datetime.timedelta(hours=2), axis=1)
    df['created']=df.apply(lambda row: datetime.datetime.now()-datetime.timedelta(hours=2),axis=1)
    df['first_name']=df.apply(lambda row: faker.first_name(), axis=1)
    df['last_name']=df.apply(lambda row: faker.last_name(), axis=1)
    df['email']=df.apply(lambda row:
                     row['first_name'].lower()+row['last_name'].lower()+domain,
                     axis=1)
    df = df[['email', 'first_name', 'last_name', 'incentive', 'created','feature_timestamp']]
    return df

feature_frame=customer_frame()

In [None]:
table_id='table_name.dataset_name'
project_id="your_gcp_project"

credentials = service_account.Credentials.from_service_account_file(
    'service_account_json_key',
)

pandas_gbq.context.credentials = credentials
pandas_gbq.context.project = project_id

pandas_gbq.to_gbq(feature_frame, table_id, project_id=project_id)

# Setting up Feast with BigQuery and Redis

In [None]:
# Install Feast. We'll be using BigQuery as offline store and Redis as online store

! pip install -qU 'feast[gcp, redis]'

In [None]:
# Create a feature repo allowing us to connect to GCP - choose some appropriate name

! feast init langchain_klaviyo -t gcp

In [None]:
# Change dir to where the feature_store.yaml file is located
# Configure the yaml file and the example_repo.py file. Example setup of these files are found on https://github.com/rabbitmetrics/personalize-LLMs

%cd langchain_klaviyo/feature_repo/

In [None]:
# Set application credentials using the json key created on GCP. Move or copy the json key to the current folder first.

import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./klaviyo.json"

In [None]:
# Apply configuations

! feast apply

In [None]:
# Materialize features (changes) from offline store to online store

!feast materialize-incremental $(date -u +"%Y-%m-%dT%H:%M:%S")

In [None]:
# Check that features have been materialized to Redis

r.keys()

In [None]:
# Import FeatureStore that allows us to extract the features

from feast import FeatureStore


feast_repo_path = "./"
store = FeatureStore(repo_path=feast_repo_path)

In [None]:
# Create function that extracts features for a particular customer

def get_feature(email):
    f=store.get_online_features(
        features=[
        "incentives:incentive",
        "incentives:first_name",
        "incentives:last_name",
    ],
        entity_rows=[{"email": email}]
    ).to_dict()
    return f

In [None]:
get_feature()

# Injecting Features into LangChain Prompt Templates

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate, StringPromptTemplate


from langchain.chains.summarize import load_summarize_chain

In [None]:
chat=ChatOpenAI(model_name="gpt-4", temperature=0.2)

In [None]:
# Entire Email template

base_template = """

You are an email writing assistant that wants to convert customers based on the information given.
Take the customer data into account when formulating an email.


Here is the data on the customer including what type of incentive we think the customer prefers:


<customer_data>

Recommended incentive: {incentive}

</customer_data>

Use the recommended incentive to craft an offer but don't mention the incentive explicitly in the email.

Relevant products: {text}

Email is from team Running Customer

Your response:"""
base_prompt = PromptTemplate.from_template(base_template)

In [None]:
# If you only need a few lines to load to Klaviyo

base_template = """

You are an email writing assistant that wants to convert customers based on the information given.

Write 3 sentences that can be used in a marketing email targeting the specific customer. Take the recommended
incentive given in the "customer data" section into account when formulating the paragraph.


Here is the data on the customer including what type of incentive we think the customer prefers:


<customer_data>

Recommended incentive: {incentive}

</customer_data>

Use the recommended incentive to craft an offer but don't mention the incentive explicitly in the email.

Relevant products: {text}

No need for signature as this will be pasted into an email template

Your response:"""
base_prompt = PromptTemplate.from_template(base_template)

In [None]:
# Create customized prompt template with feature data

class FeastPromptTemplate(StringPromptTemplate):
    def format(self, **kwargs) -> str:
        email = kwargs.pop("email")
        feature_vector = store.get_online_features(
            features=[
                "incentives:incentive",
                "incentives:first_name",
                "incentives:last_name",
            ],
            entity_rows=[{"email": email}],
        ).to_dict()
        kwargs["incentive"] = feature_vector["incentive"][0]
        return base_prompt.format(**kwargs)

In [None]:
feature_prompt_template = FeastPromptTemplate(input_variables=["email","text"])

In [None]:
print(feature_prompt_template.format(email="davidhill@mg.rabbitpromotion.com",text="adidas shoes"))

In [None]:
# Create summarize chain with GPT-4 and customized feature prompt template

chain = load_summarize_chain(chat, chain_type="stuff", prompt=feature_prompt_template)
response=chain({"input_documents": docs,"email": "some_email"},return_only_outputs=False)

print(response['output_text'])

#Chatbot
We can use the same feast+redis backend to feed a chatbot with customer features. This allows for personalization of all interactions with customers. If the pipeline to Redis through Feast is set up to be event-driven this allows for real-time contextualization.

In [None]:
# Create template that allows for both feature injection and customer interaction

base_template = """

You are a conversational ecommerce shopping assistant that wants to convert the customer based on
the information given.


Here is the data on the customer including what type of incentive we think the customer prefers:


<customer_data>

Recommended incentive: {incentive}

</customer_data>


Human: {question}

Relevant products: {context}



Your response:"""
base_prompt = PromptTemplate.from_template(base_template)

In [None]:
# Template used for condensing the question and chat history

template="""
Use the follow up input {question}, and the chat history {chat_history} to formulate a standalone question.
"""
condense_question_prompt = PromptTemplate.from_template(template)

In [None]:
# We'll use ConversationalRetrievalChain with streaming output for the chatbot

from langchain.chains import ConversationalRetrievalChain
from langchain.callbacks.manager import AsyncCallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [None]:
feature_prompt_template = FeastPromptTemplate(input_variables=["email","question","context"])

In [None]:
# Create the chatbot using ConversationalRetrievalChain - note that the feature prompt template is passed as kwargs.

chatbot = ConversationalRetrievalChain.from_llm(
    ChatOpenAI(temperature=0,
               model="gpt-4",
               streaming=True,
               callbacks=AsyncCallbackManager([
               StreamingStdOutCallbackHandler()
    ]),
              ),
    rds.as_retriever(),
    condense_question_prompt = condense_question_prompt,
    condense_question_llm = ChatOpenAI(temperature=0, model='gpt-4'),
    combine_docs_chain_kwargs=dict(prompt=feature_prompt_template),
    verbose=True
)

In [None]:
print(feature_prompt_template.format(email="some_email",question="looking for shoes",context="adidas"))

In [None]:
chat_history = []
query = "I'm looking for somme cool kids sneakers"
result = chatbot({"question": query,"email":"some_email", "chat_history": chat_history})