In [36]:
import os
from pinecone import Pinecone

# initialize connection to pinecone (get API key at app.pinecone.io)
api_key = os.environ.get('PINECONE_API_KEY') or 'f2c88f45-2d0c-4d8a-849d-21bfc78dc15d'

# configure client
pc = Pinecone(api_key=api_key)

In [2]:
from pinecone import ServerlessSpec

cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'

spec = ServerlessSpec(cloud=cloud, region=region)

In [37]:
# Giving our index a name
index_name = "hello-pinecone"

In [38]:
# Delete the index, if an index of the same name already exists
if index_name in pc.list_indexes().names():
    pc.delete_index(index_name)

In [39]:
import time

dimensions = 3
pc.create_index(
    name=index_name,
    dimension=dimensions,
    metric="cosine",
    spec=spec
)

# wait for index to be ready before connecting
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

In [40]:
index = pc.Index(index_name)

In [41]:
import pandas as pd

df = pd.DataFrame(
    data={
        "id": ["A", "B"],
        "vector": [[1., 1., 1.], [1., 2., 3.]]
    })
df

Unnamed: 0,id,vector
0,A,"[1.0, 1.0, 1.0]"
1,B,"[1.0, 2.0, 3.0]"


In [42]:
index.upsert(vectors=zip(df.id, df.vector))  # insert vectors

{'upserted_count': 2}

In [9]:
index.describe_index_stats()

{'dimension': 3,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [43]:
index.query(
    vector=[2., 2., 2.],
    top_k=5,
    include_values=True) # returns top_k matches

{'matches': [{'id': 'A', 'score': 1.0, 'values': [1.0, 1.0, 1.0]},
             {'id': 'B', 'score': 0.925820112, 'values': [1.0, 2.0, 3.0]}],
 'namespace': '',
 'usage': {'read_units': 6}}

In [None]:
pc.delete_index(index_name)

In [44]:
from PyPDF2 import PdfReader
from dotenv import load_dotenv
import fitz
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
load_dotenv()
from langchain_pinecone import PineconeVectorStore
from langchain.document_loaders.csv_loader import CSVLoader

In [45]:
load_dotenv()

True

In [46]:
index_name = 'cv-vector-database-test'
pc.create_index(
    name=index_name,
    dimension=1536,
    metric="cosine",
    spec=spec
)

In [47]:
vectorstore = PineconeVectorStore(index_name=index_name, embedding=OpenAIEmbeddings())

In [48]:
vectorstore.add_texts(["hello"])

['27a9bf58-3755-494a-b6a6-2dbb1cca32c3']

In [49]:
vectorstore.add_texts(["bye"])

['88a9e9de-18c7-4b17-9bf6-c953834feb19']

In [50]:
vectorstore.similarity_search("hi", k=1)

[Document(page_content='hello')]

In [24]:
pdf_document = fitz.open("1.pdf")
text = ""
for page_number in range(len(pdf_document)):
    page = pdf_document[page_number]
    
    # Extract text from the current page
    page_text = page.get_text()
    
    # Append the extracted text to the overall text string
    text += page_text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)
chunks = text_splitter.split_text(text=text)
embeddings = OpenAIEmbeddings()
#VectorStore = FAISS.from_texts(chunks,embeddings)

In [52]:
docsearch = PineconeVectorStore.from_texts(chunks, embeddings, index_name="cv-db")

In [53]:
query = "What is the university of Do Minh Quang"
ans = docsearch.similarity_search(query, k=1)

In [54]:
ans

[Document(page_content='P E R S O N A L  P R O J E C T S\nC O N T A C T\nE D U C A T I O N\nVietnamese German University\nMajor: Information Technology\n2021 - 2025\nP R O G R A M M I N G\nL A N G U A G U E S\nEnglish: IELTS 8.0\nMerit scholarship 100% tuition fee\n2022, 2023\nDAAD exchange semester scholarship\nin Germany\nDo Minh Quang\nTel: (+84)-963-916-127\nLinkedin:linkedin.com/in/quang-\ndo-minh/\nEmail: quangdm961@gmail.com\nFB: facebook.com/do.quang.777\nGithub: github.com/minWang916\nGeneral purpose: C, C++, Java,\nPython\nWebdev: HTML, CSS, JS, Django,\nSQL, PHP\nColab tools: Github, Notion, Trello\nDate Engineering tools: AWS,\nPowerBI, Airflow, Docker\nApplying to: Data Engineer Internship at Grab\nGPA: 9.4/10 (first year)\n           1.1 (second year)\nFrankfurt University of Applied\nScience - Exchange semester\nSep 2023 - Mar 2024 \nC E R T I F I C A T E\nPython: Self-driving car\nLink: https://github.com/minWang916/AI-self-driving-car-\nsimulator\nDescription: process 