In [24]:
import pandas as pd

from langchain.schema import Document


# Top 50 Spotify music dataset
song_df = pd.read_csv("https://gist.githubusercontent.com/rioto9858/ff72b72b3bf5754d29dd1ebf898fc893/raw/1164a139a780b0826faef36c865da65f2d3573e0/top50MusicFrom2010-2019.csv")
docs = [
    Document(page_content=t, metadata={"artist": a, "genre": g, "year": y})
    for _, (t, a, g, y) in song_df[["title", "artist", "the genre of the track", "year"]].sample(100).iterrows()
]

In [None]:
import os

import pinecone

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone


# You'll need to sign up for Pinecone and get your credentials to run this demo.
pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment=os.environ["PINECONE_ENV"])
pinecone.create_index("langchain-self-retriever-demo", dimension=1536)
vectorstore = Pinecone.from_documents(docs, OpenAIEmbeddings(), index_name="langchain-self-retriever-demo")

# # If you've already created the index:
# vectorstore = Pinecone.from_existing_index(index_name="langchain-self-retriever-demo", embedding=OpenAIEmbeddings())

# Self querying

In [7]:
from langchain.llms import OpenAI
from langchain.retrievers.pinecone_self_query import (
    MetadataFieldInfo,
    PineconeSelfQueryRetriever, 
    VectorStoreExtendedInfo, 
)

In [33]:
vectorstore_info = VectorStoreExtendedInfo(
    vectorstore=vectorstore, 
    name="Top 50 Spotify Songs", 
    description="The most popular songs on Spotify", 
    metadata_field_info=[
        MetadataFieldInfo(
            name="artist",
            description="The artist who released the song", 
            type="string", 
            examples=song_df['artist'].sample(3).tolist()
        ),
        MetadataFieldInfo(
            name="genre",
            description="The genre of the song", 
            type="string", 
            examples=song_df['the genre of the track'].sample(3).tolist()
        ),
        MetadataFieldInfo(
            name="year",
            description="The year the song was released", 
            type="integer", 
            examples=song_df['year'].sample(3).tolist()
        ),
    ]
)

In [34]:
retriever = PineconeSelfQueryRetriever.from_vectorstore_info(OpenAI(temperature=0), vectorstore_info)

In [35]:
retriever.get_relevant_documents("What are some songs about love")

{'search_string': 'love', 'metadata_filter': {}}


[Document(page_content='Love', metadata={'artist': 'Lana Del Rey', 'genre': 'art pop', 'year': 2017.0}),
 Document(page_content='L.A.LOVE (la la)', metadata={'artist': 'Fergie', 'genre': 'dance pop', 'year': 2015.0}),
 Document(page_content='human', metadata={'artist': 'Christina Perri', 'genre': 'dance pop', 'year': 2014.0}),
 Document(page_content='Someone You Loved', metadata={'artist': 'Lewis Capaldi', 'genre': 'pop', 'year': 2019.0})]

In [37]:
retriever.get_relevant_documents("What are some popular pop songs from 2012")

{'search_string': ' ', 'metadata_filter': {'genre': {'$eq': 'pop'}, 'year': {'$eq': 2012}}}


[Document(page_content='I Knew You Were Trouble.', metadata={'artist': 'Taylor Swift', 'genre': 'pop', 'year': 2012.0}),
 Document(page_content='One More Night', metadata={'artist': 'Maroon 5', 'genre': 'pop', 'year': 2012.0})]

In [46]:
retriever.get_relevant_documents("What are some dance or art pop songs that mention money or success after 2015")

{'search_string': 'money OR success', 'metadata_filter': {'genre': {'$in': ['dance pop', 'art pop']}, 'year': {'$gte': 2015}}}


[Document(page_content='Love', metadata={'artist': 'Lana Del Rey', 'genre': 'art pop', 'year': 2017.0}),
 Document(page_content='Confident', metadata={'artist': 'Demi Lovato', 'genre': 'dance pop', 'year': 2016.0}),
 Document(page_content='Up', metadata={'artist': 'Olly Murs', 'genre': 'dance pop', 'year': 2015.0}),
 Document(page_content='Booty', metadata={'artist': 'Jennifer Lopez', 'genre': 'dance pop', 'year': 2015.0})]

In [48]:
retriever.get_relevant_documents("Did Maroon 5 release any popular rap songs")

{'search_string': 'Maroon 5', 'metadata_filter': {'artist': {'$eq': 'Maroon 5'}, 'genre': {'$eq': 'rap'}}}


[]