In [1]:
import pandas as pd
import os
import dotenv
from dotenv import load_dotenv, find_dotenv
import io
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import OpenAI
from langchain.vectorstores import DocArrayInMemorySearch, FAISS
from langchain_text_splitters import CharacterTextSplitter
from IPython.display import display, Markdown

load_dotenv(find_dotenv())

True

In [2]:
key = os.environ.get("OPENAI_API_KEY")

In [3]:
df = pd.read_csv('data/winemag-data-130k-v2.csv')[['country', 'title', 'description', 'variety', 'winery','points','price']]

In [4]:
processed_csv = 'data/processed/winemag_130k_slim.csv'
df.to_csv(processed_csv, index=False)

df.head()

Unnamed: 0,country,title,description,variety,winery,points,price
0,Italy,Nicosia 2013 Vulkà Bianco (Etna),"Aromas include tropical fruit, broom, brimston...",White Blend,Nicosia,87,
1,Portugal,Quinta dos Avidagos 2011 Avidagos Red (Douro),"This is ripe and fruity, a wine that is smooth...",Portuguese Red,Quinta dos Avidagos,87,15.0
2,US,Rainstorm 2013 Pinot Gris (Willamette Valley),"Tart and snappy, the flavors of lime flesh and...",Pinot Gris,Rainstorm,87,14.0
3,US,St. Julian 2013 Reserve Late Harvest Riesling ...,"Pineapple rind, lemon pith and orange blossom ...",Riesling,St. Julian,87,13.0
4,US,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,"Much like the regular bottling from 2012, this...",Pinot Noir,Sweet Cheeks,87,65.0


In [49]:
# use temperature=0.2 to instigate some creativity in the LLM responses
# llm = OpenAI(temperature=0.2, openai_api_key=key)

In [6]:
csv_loader = CSVLoader(file_path=processed_csv)

# create embeddings
embedding = OpenAIEmbeddings(openai_api_key=key)

  embedding = OpenAIEmbeddings(openai_api_key=key)


#### Retrive k results by leveraging a VectorStore and FAISS

In [7]:
documents = csv_loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [8]:
vectorstore = FAISS.from_documents(texts, embedding)

In [68]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 20})
response0 = retriever.invoke("Suggest some Italian wines that are earthy, return examples as a table in markdown with columns; country, title, winery, variety, description, points, price.")

In [88]:
def response_to_df(response):

    response_data = []
    
    for doc in range(len(response)):
        country = response[doc].page_content.split('\n', 7)[0].split(': ', 2)[1]
        title = response[doc].page_content.split('\n', 7)[1].split(': ', 2)[1]
        desc = response[doc].page_content.split('\n', 7)[2].split(': ', 2)[1]
        variety = response[doc].page_content.split('\n', 7)[3].split(': ', 2)[1]
        winery = response[doc].page_content.split('\n', 7)[4].split(': ', 2)[1]
        try:
            points = response[doc].page_content.split('\n', 7)[5].split(': ', 2)[1]
        except:
            points = None
        try:
            price = response[doc].page_content.split('\n', 7)[6].split(': ', 2)[1]
        except:
            price = None
    
        response_data.append([
            country,
            title,
            desc,
            variety,
            winery,
            points,
            price
        ])
    
        #print(f"response data for {doc}th document", response_data)
    
    df_response = pd.DataFrame(response_data, columns = ['country','title','description','variety','winery','points','price'])
    df_response['points'] = df_response['points'].astype(float)
    df_response['price'] = df_response['price'].astype(float)

    return df_response

#### Complete recommendations:

In [89]:
df_response = response_to_df(response=response0)

pd.set_option('display.max_colwidth', None)
df_response.head(20)

Unnamed: 0,country,title,description,variety,winery,points,price
0,Italy,Vasco Sassetti 2005 Brunello di Montalcino,"Earthy aromas of white mushroom and dried roses that are characteristic of Tuscan Sangiovese are what you notice first here. The wine then segues to fruit and spice aromas, although those berry notes are on the mature and jammy side. Fresh acidity and firm tannins suggest a pairing with red meat.",Sangiovese Grosso,Vasco Sassetti,87.0,
1,Italy,Mocavero 2011 Primitivo (Salento),"This earthy wine opens with aromas of ripe plum, tilled earth, game and a whiff of barnyard. The rustic but juicy palate doles out black cherry, spicy blueberry and ground pepper framed by hearty tannins.",Primitivo,Mocavero,86.0,30.0
2,Italy,Terre da Vino 2008 Essenze (Barolo),"Inky and dark, this has modern touches of toasted oak and exotic spice. Those smoky aromas will decrease with time, giving prominence to the cherry and blackberry flavors already present.",Nebbiolo,Terre da Vino,91.0,60.0
3,Italy,Collosorbo 2010 Riserva (Brunello di Montalcino),"Here's an earthy wine that opens with aromas of scorched earth, leather, truffle, raspberry compote and a note of cured meat. The chewy palate offers mature Morello cherry, grilled herb, coffee, clove and anise while firm, ripe tannins provide the framework. A tobacco note closes the finish. Drink 2018–2028.",Sangiovese,Collosorbo,94.0,75.0
4,Italy,Cielo e Terra 2011 Primi Soli Primitivo (Puglia),"A certified organic wine, this offers easy berry aromas of cherry and raspberry that would pair with informal foods like pizza. The mouthfeel is lean and bright.",Primitivo,Cielo e Terra,85.0,
5,Italy,Bellaria 2011 Assunto (Brunello di Montalcino),"Scorched earth, game, cured meat and leather waft out of the glass. The taught palate offers dried black cherry, pomegranate roasted coffee bean, anise and a vanilla note but not quite enough fruit richness. Tightly wound, grainy tannins leave an astringent finish. Give the tannins a few more years to unwind then drink.",Sangiovese,Bellaria,88.0,
6,Italy,Cascina Saria 2012 Colle del Gelso (Barbaresco),"This opens with earthy aromas of scorched soil, underbrush and a funky whiff of game while the palate offers candied cherry, white pepper and a hint of tobacco alongside firm tannins. It closes on an aniseed note.",Nebbiolo,Cascina Saria,90.0,
7,Italy,Occhipinti 2010 Cerasuolo di Vittoria,"Earthy aromas of underbrush, game, overripe berry and tilled soil lead the nose on this soulful wine. The palate offers the same gamy note alongside spiced blackberry, blueberry, orange zest and clove. Firm but fine-grained tannins provide support.",Red Blend,Occhipinti,89.0,
8,Italy,Fattoria del Pino 2012 Brunello di Montalcino,"This robust red opens with scorched earth, new leather, underbrush and dark spice aromas. The palate is brawny and monolithic, offering licorice, dried cherry and clove flavors with the warmth of evident alcohol. It's framed by chewy tannins and should be drunk soon to capture the remaining fruit and freshness.",Sangiovese,Fattoria del Pino,88.0,
9,Italy,Terre da Vino 2010 Essenze (Barolo),"This straightforward, inviting Barolo boasts classic Nebbiolo aromas of rose, leather, berry spice and tilled earth. It's not loaded with complexity, but the sour cherry, black pepper and clove flavors are delicious. It closes on a note of subtle oak alongside a drying, tannic finish.",Nebbiolo,Terre da Vino,91.0,50.0


In [64]:
# query = "Suggest some wines that are spicy with the highest Points and the lowest Price, return examples as a table in markdown with columns; country, title, winery, variety, description, points, price."
# response = index.query(query, llm=llm)

### Recommendations weighted by Point Preference and Price Preference

In [92]:
weight_points = .75
weight_price = .25

df_response['weighted_reccomendation_value'] = (df_response['points'] * weight_points) + -1*(df_response['price'] * weight_price)

df_response.sort_values(by=['weighted_reccomendation_value'], ascending=[False]).head(5)

Unnamed: 0,country,title,description,variety,winery,points,price,weighted_reccomendation_value
11,Italy,Terra d'Aligi 2009 Pecorino (Terre di Chieti),"Here's a Best Buy white wine that would pair with salads, vegetable dishes and appetizers. It's crisp and luminous with lively aromas of citrus, lemon zest, stone fruit and melon. Pecorino is one of Italy's hottest varieties on the indigenous grape scene.",Pecorino,Terra d'Aligi,86.0,11.0,61.75
14,Italy,Cleto Chiarli 2006 Vigneto Enrico Cialdini (Lambrusco Grasparossa di Castelvetro),"Fresh and fruity, this ruby-colored Lambrusco delivers thick red foam and aromas of forest berry and blueberry. The wine has good structure and some tannins that would work well with chocolate cake.",Lambrusco,Cleto Chiarli,85.0,17.0,59.5
10,Italy,Giuseppe Lonardi 2010 Valpolicella Classico,"There's an earthy, leathery feel to this wine that recalls button mushroom, forest floor and dried rose petal. It shows an informal, compact disposition in the mouth.","Corvina, Rondinella, Molinara",Giuseppe Lonardi,83.0,13.0,59.0
1,Italy,Mocavero 2011 Primitivo (Salento),"This earthy wine opens with aromas of ripe plum, tilled earth, game and a whiff of barnyard. The rustic but juicy palate doles out black cherry, spicy blueberry and ground pepper framed by hearty tannins.",Primitivo,Mocavero,86.0,30.0,57.0
9,Italy,Terre da Vino 2010 Essenze (Barolo),"This straightforward, inviting Barolo boasts classic Nebbiolo aromas of rose, leather, berry spice and tilled earth. It's not loaded with complexity, but the sour cherry, black pepper and clove flavors are delicious. It closes on a note of subtle oak alongside a drying, tannic finish.",Nebbiolo,Terre da Vino,91.0,50.0,55.75


In [None]:
# query = "Suggest ten wines from US with hints of your grandmother's purse, format result as a table in markdown with columns; country, title, winery, variety, description."
# response = index.query(query, llm=llm)
# display(Markdown(response))