In [None]:
import langchain
from langchain_community.document_loaders import DataFrameLoader
import json
import pandas as pd
import getpass
import os
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings 
import openai 
from langchain_core.documents import Document

In [None]:
if not os.getenv("pinecone_API"):
    os.environ["pinecone_API"] = getpass.getpass("Enter your Pinecone API key: ")

pinecone_api_key = os.environ.get("pinecone_API")

pc = Pinecone(api_key=pinecone_api_key)

api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = api_key


In [None]:
pc.create_index(
    name= 'course-description-db',
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

index = pc.Index("course-description-db")
vector_store = PineconeVectorStore(index , embedding = embeddings)

In [None]:
df = pd.read_json('../data/lecture_data.json')
df_renamed = df.rename(columns = {'Course Description' : 'course_description',
                                  'Course Code' : 'course_id'})
df_final = df_renamed[['course_id', 'course_description']]

print(df_final.head())

In [None]:
loader = DataFrameLoader(
    df_final,
    page_content_column = 'course_description'
)


documents = loader.load()


In [None]:
vector_store = PineconeVectorStore(index = index, embedding = embeddings)

vector_store.add_documents(documents = documents)