# Load Environment
The `.env` file is expected to include a valid OpenAI API key.

In [1]:
import os
import openai
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)

openai.api_key = os.environ['OPENAI_API_KEY']
models = openai.Model.list()
if (len(models) == 0):
    raise Exception("Your OpenAI API key does not appear to be valid. Please check it and try again.")

# Select a Product
Load and select one of the products from the dataset. This demo references a single product, so no need to create embeddings for other products.

In [2]:
from CONSTANTS import example_asin
import pandas as pd
df = pd.read_parquet('reviews.parquet.gz')
df_meta = pd.read_parquet('metadata.parquet.gz')

df = df.loc[df['asin'] == example_asin].copy()
df_meta = df_meta.loc[df_meta['asin'] == example_asin].copy()

The title of the product:

In [3]:
print (df_meta.loc[df_meta['asin'] == example_asin]['title'].values[0])

Streamlight 66118 Stylus Pro LED PenLight with Holster, Black - 100 Lumens


And some of the reviews:

In [4]:
print(df.head(10)['reviewText'].values)

["Totally impressed with this little flashlight.  Small size fits and clips easily into pants pocket. Surprisingly bright beam shines much farther than I expected.  Aluminum housing is very durable, and I've gotten it wet several times with no problems. The light has been used quite a bit and is still using the original batteries.  One hand operation makes it very handy."
 "Lightweight, intense light, easy to operate with one hand- this would be an incredible tool for law enforcement or an emergency responder. I'm buying a few more immediately I doubt one could ask for a more versatile hand held light. The beam is very intense- might even do well as a self-defense item as it is blinding if shone directly in the eyes. Money well spent!"
 '<div id="video-block-R14P1E1E7LGJEV" class="a-section a-spacing-small a-spacing-top-mini video-block"></div><input type="hidden" name="" value="https://images-na.ssl-images-amazon.com/images/I/E19J6MdazZS.mp4" class="video-url"><input type="hidden" nam

# Create Embeddings

Load up the Langchain embeddings helper class:

In [3]:
from CONSTANTS import embed_model
from langchain.embeddings import OpenAIEmbeddings
langchain_embeddings = OpenAIEmbeddings(model=embed_model)

Pull the text we'll be embedding into a list:

In [4]:
embedding_text=df['truncated'].values.tolist()
embedding_text[0:10]

["Totally impressed with this little flashlight.  Small size fits and clips easily into pants pocket. Surprisingly bright beam shines much farther than I expected.  Aluminum housing is very durable, and I've gotten it wet several times with no problems. The light has been used quite a bit and is still using the original batteries.  One hand operation makes it very handy.",
 "Lightweight, intense light, easy to operate with one hand- this would be an incredible tool for law enforcement or an emergency responder. I'm buying a few more immediately I doubt one could ask for a more versatile hand held light. The beam is very intense- might even do well as a self-defense item as it is blinding if shone directly in the eyes. Money well spent!",
 '<div id="video-block-R14P1E1E7LGJEV" class="a-section a-spacing-small a-spacing-top-mini video-block"></div><input type="hidden" name="" value="https://images-na.ssl-images-amazon.com/images/I/E19J6MdazZS.mp4" class="video-url"><input type="hidden" n

Finally, call the embedding function. We've wrapped in `aiometer` to keep the rate below the OpenAI API limit, but generate them as quickly as possible.

In [5]:
embed_file = f"{example_asin}.reviews-embeddings-{embed_model}.parquet.gz"

import aiometer
embeddings=list(range(0,len(embedding_text)))

async def async_embed(index):
    resp = await langchain_embeddings.aembed_query(embedding_text[index])
    embeddings[index] = resp

await aiometer.run_on_each(
    async_embed,
    embeddings,
    max_per_second=50
)        

df['embeddings'] = embeddings
df.to_parquet(embed_file, compression='gzip')
    
df.head(10)

Unnamed: 0,overall,verified,reviewTime,reviewerID,asin,style,reviewerName,reviewText,summary,unixReviewTime,vote,image,truncated,embeddings
1484623,5.0,False,"06 2, 2008",A39XFGZ0ASWT7O,B0015UC17E,"{'Color Name:': None, 'Color:': ' Black w/Whit...",Amazon Customer,Totally impressed with this little flashlight....,love my streamlight stylus,1212364800,8.0,,Totally impressed with this little flashlight....,"[-0.002683741971850395, 0.014071956276893616, ..."
1484624,5.0,True,"05 31, 2008",A1UA6TH2XRK9ZX,B0015UC17E,"{'Color Name:': None, 'Color:': ' Black w/Whit...",Everyman,"Lightweight, intense light, easy to operate wi...",Great light Great price,1212192000,73.0,,"Lightweight, intense light, easy to operate wi...","[-0.015947110950946808, 0.007599272765219212, ..."
1484625,5.0,True,"05 14, 2008",A8XV9SMUW4OT4,B0015UC17E,"{'Color Name:': None, 'Color:': ' Black w/Whit...",Happily Married,"<div id=""video-block-R14P1E1E7LGJEV"" class=""a-...",VIDEO and review for Stylus Pro light,1210723200,803.0,,"<div id=""video-block-R14P1E1E7LGJEV"" class=""a-...","[0.00011079138494096696, -0.013300847262144089..."
1484626,5.0,False,"12 13, 2007",A1PY6V6J7K5G84,B0015UC17E,,grumpy,This is a great light for the price. It's the ...,great light,1197504000,3.0,,This is a great light for the price. It's the ...,"[0.009861239232122898, -0.00737977446988225, -..."
1484627,5.0,True,"05 22, 2018",A1SXP6OD7J3A91,B0015UC17E,"{'Color Name:': None, 'Color:': ' Blue w/White...",John A.,Just runs and runs...great light,Fantastic flashlight,1526947200,,,Just runs and runs...great light,"[-0.011605827137827873, -0.0030300442595034838..."
1484628,5.0,True,"05 22, 2018",AYXJ3JUAOBONE,B0015UC17E,"{'Color Name:': None, 'Color:': ' Black w/Whit...",Pete Haglund,"Only had it overnight, So far so good! Solid p...",So far so good! Solid product,1526947200,,,"Only had it overnight, So far so good! Solid p...","[0.008355016820132732, 0.01563715934753418, 0...."
1484629,5.0,True,"05 21, 2018",A1WLQE5R4ULY6N,B0015UC17E,"{'Color Name:': None, 'Color:': ' Black w/Whit...",John Sandstrom,"Carry one in your bag, just in case.",Five Stars,1526860800,,,"Carry one in your bag, just in case.","[0.009391376748681068, 0.003927536774426699, 0..."
1484630,5.0,True,"05 21, 2018",A13AVJYZLJDVDR,B0015UC17E,"{'Color Name:': None, 'Color:': ' Black w/Whit...",jonathan,FANTASIC light and fits anywhere. Bright and s...,Great light!!,1526860800,,,FANTASIC light and fits anywhere. Bright and s...,"[0.014649939723312855, -0.006138053722679615, ..."
1484631,2.0,True,"05 21, 2018",A2IBKZX5RHO4DO,B0015UC17E,"{'Color Name:': None, 'Color:': ' Black w/Whit...",Amazon Customer,Great in the beginning but started to corrode ...,Great in the beginning but started to corrode ...,1526860800,,,Great in the beginning but started to corrode ...,"[0.002721348311752081, -0.003564865794032812, ..."
1484632,5.0,True,"05 20, 2018",A3C7HCJ6XDXO6S,B0015UC17E,"{'Color Name:': None, 'Color:': ' Black w/Whit...",Nikki Smith,This thing is sturdy and has held up well with...,Five Stars,1526774400,,,This thing is sturdy and has held up well with...,"[0.0047048344276845455, 0.00238009262830019, 0..."


In [13]:
print(df['embeddings'].iloc[0])

[-0.002683741971850395, 0.014071956276893616, -0.013861337676644325, -0.013650719076395035, -0.004008335527032614, 0.023365503177046776, 0.013440100476145744, -0.04209739342331886, -0.0023924957495182753, -0.02703816443681717, 0.015151376836001873, 0.004156426526606083, -0.02301008254289627, 0.013078099116683006, -0.006371213123202324, 0.01226195227354765, 0.009866165928542614, -0.0040116263553500175, 0.009247473441064358, -0.02123298868536949, -0.01827116496860981, 0.013097845017910004, -0.02685387246310711, -0.008944708853960037, -0.005999339744448662, -0.038306258618831635, 0.021483099088072777, -0.01991662196815014, 0.029460279271006584, -0.01114962249994278, 0.03111889958381653, -0.014019301161170006, -0.047415513545274734, -0.025932416319847107, -0.008187798783183098, 0.006996487267315388, -0.009668710641562939, 0.00706888735294342, 0.01362439151853323, -0.00833918061107397, 0.024050012230873108, 0.005097628571093082, 0.007733652368187904, -0.00046196230687201023, -0.021680554375