In [None]:
from langchain_community.llms import Ollama
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone as P1
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pinecone import Pinecone as p1, ServerlessSpec
from langchain_pinecone import PineconeVectorStore

In [None]:
llm = Ollama(model="llama3")

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    
)

In [None]:
data = """Shah Rukh Khan (pronounced [ˈʃɑːɦɾʊx xɑːn] ⓘ; born 7 November 1965), also known by the initialism SRK, is an Indian actor and film producer who works in Hindi cinema. Referred to in the media as the "Baadshah of Bollywood" and "King Khan",[a] he has appeared in more than 100 films, and earned numerous accolades, including 14 Filmfare Awards. He has been awarded the Padma Shri by the Government of India, as well as the Order of Arts and Letters and Legion of Honour by the Government of France. Khan has a significant following in Asia and the Indian diaspora worldwide. In terms of audience size and income, several media outlets have described him as one of the most successful film stars in the world.[b] Many of his films thematise Indian national identity and connections with diaspora communities, or gender, racial, social and religious differences and grievances.

Khan began his career with appearances in several television series in the late 1980s and made his Bollywood debut in 1992 with the musical romance Deewana. He was initially recognised for playing villainous roles in the films Baazigar (1993) and Darr (1993). Khan established himself by starring in a series of top-grossing romantic films, including Dilwale Dulhania Le Jayenge (1995), Dil To Pagal Hai (1997), Kuch Kuch Hota Hai (1998), Mohabbatein (2000), Kabhi Khushi Kabhie Gham... (2001), Kal Ho Naa Ho (2003), Veer-Zaara (2004) and Kabhi Alvida Naa Kehna (2006). He earned critical acclaim for his portrayal of an alcoholic in the period romantic drama Devdas (2002), a NASA scientist in the social drama Swades (2004), a hockey coach in the sports drama Chak De! India (2007), and a man with Asperger syndrome in the drama My Name Is Khan (2010). Further commercial successes came with the romances Om Shanti Om (2007) and Rab Ne Bana Di Jodi (2008), and with his expansion to comedies in Chennai Express (2013) and Happy New Year (2014). Following a brief setback and hiatus, Khan made a career comeback with the 2023 action thrillers Pathaan and Jawan, both of which rank among the highest-grossing Indian films.

As of 2015, Khan is co-chairman of the motion picture production company Red Chillies Entertainment and its subsidiaries and is the co-owner of the Indian Premier League cricket team Kolkata Knight Riders and the Caribbean Premier League team Trinbago Knight Riders. The media often label him as "Brand SRK" because of his many endorsements and entrepreneurship ventures. He is a frequent television presenter and stage show performer. Khan's philanthropic endeavours have provided health care and disaster relief, and he was honoured with UNESCO's Pyramide con Marni award in 2011 for his support of children's education and the World Economic Forum's Crystal Award in 2018 for advocating for women's and children's rights in India. He regularly features in listings of the most influential people in Indian culture, and in 2008, Newsweek named him one of their fifty most powerful people in the world. In 2022, Khan was voted one of the 50 greatest actors of all time in a readers' poll by Empire, and in 2023, Time named him as one of the most influential people in the world.

Early life and family
Parents
"""

In [None]:
chunks = text_splitter.split_text(data)

In [None]:
import os
os.environ['PINECONE_API_KEY'] = '39c3b55b-2ae4-44ee-a9cd-83a99876c828'
pc = p1(
    api_key=os.environ.get("PINECONE_API_KEY")
)


In [None]:
embeding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
index_name="test1"
index = pc.Index("test1")  
for i, t in zip(range(len(chunks)), chunks):
   query_result = embeding.embed_query(t)
   print(i,t)
   index.upsert(
   vectors=[
        {
            "id": str(i),  # Convert i to a string
            "values": query_result, 
            "metadata": {"text":str(t)} # meta data as dic
        }
    ],
    namespace="real" 
)

# Pinecone.from_texts(chunks,embedding=embeding,index_name="test")

In [None]:
# vectorstore = Pinecone(
#     index_name=index_name,
#     embeddings =embeding.embed_query,
#     namespace="real"
# )
from langchain_pinecone import PineconeVectorStore

index_name = "test1"

vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeding,namespace="real")



In [None]:
retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [None]:
retrieval_qa.run("who is srk?")