In [11]:
import langchain
from langchain_community.document_loaders import DataFrameLoader
import json
import pandas as pd
import getpass
import os
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings 
import openai 
from langchain_core.documents import Document

In [12]:
if not os.getenv("pinecone_API"):
    os.environ["pinecone_API"] = getpass.getpass("Enter your Pinecone API key: ")

pinecone_api_key = os.environ.get("pinecone_API")

pc = Pinecone(api_key=pinecone_api_key)

api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = api_key


In [13]:
pc.create_index(
    name= 'course-description-db',
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

In [14]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

index = pc.Index("course-description-db")
vector_store = PineconeVectorStore(index , embedding = embeddings)

In [15]:
df = pd.read_json('../data/lecture_data.json')
df_renamed = df.rename(columns = {'Course Description' : 'course_description',
                                  'Course Code' : 'course_id'})
df_final = df_renamed[['course_id', 'course_description']]

print(df_final.head())

    course_id                                 course_description
0  AFRAM  150  Introductory survey of topics and problems in ...
1  AFRAM  272  Reconstruction and its aftermath, the Agrarian...
2  AFRAM  318  Considers how generic forms and conventions ha...
3  AFRAM  330  Focuses on cultural expressions created by peo...
4  AFRAM  499  Identification and investigation of the proble...


In [9]:
loader = DataFrameLoader(
    df_final,
    page_content_column = 'course_description'
)


documents = loader.load()


In [16]:
vector_store = PineconeVectorStore(index = index, embedding = embeddings)

vector_store.add_documents(documents = documents)

['670bf302-8c37-4d27-9d16-8a4688c19ac6',
 'ae5b1cbe-b5e7-4007-81fe-7f8a2ffaef6d',
 '9a9760c8-6ebb-426c-93f0-2b5a992d68f5',
 '1319d8ae-5cb2-425f-a2ac-a1663e3b9738',
 'fc1e73c6-578e-40f2-9385-828572435921',
 'f044609a-8d2a-42a3-9eb3-5ca898628e71',
 'deb2ae1c-6958-4660-a731-924af77ace60',
 'a0dbf6eb-d4db-451d-acda-330940233cd5',
 '45dda107-f293-4f68-bba5-30938b5f36e5',
 '4e20e5b2-04d9-4930-bffb-4b911df6f8ed',
 '07b8ef77-ad79-48cc-b1fd-0ba1f0feebb4',
 'a42dd57e-7741-46a1-90eb-c61b42b53707',
 'e21f732f-7b1e-42fc-887f-358bd5b4fff9',
 '75664e6d-baa0-47b8-8852-f5cfd50da379',
 '64daec8a-e1a9-409b-9c18-7248caabfeab',
 '7d43f43b-7bba-4771-953b-96c5ea8fc59a',
 '5e8f5db3-f10c-4ea9-acce-c298384b7ad3',
 'e91c0d3b-189b-4678-9f06-b4d0a1b352e9',
 'bb62f7cc-72f1-4dcc-9770-396f63175e09',
 '230a3957-47dd-4e1c-99b2-3b11d6584144',
 '9e45ee85-cd95-4284-8b75-8999f5a69f71',
 '7d6155ef-dea9-4b92-9f35-691b4bc6b613',
 'ded27493-5153-4587-849c-ac48d67473ec',
 'f7d689d2-758d-4418-9fa1-b959f51cea92',
 '2ee15289-c12e-