In [35]:
# Import required libraries
import pandas as pd
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
import openai
import streamlit as st


In [24]:
# Set your OpenAI API key
api_key = 'your api key'
openai.api_key = api_key


In [26]:
# Function to load and prepare dataset
def load_and_prepare_data(url):
    df = pd.read_csv(url)
    
    # Handle missing values by replacing NaN with empty string
    df['title'] = df['title'].fillna('')
    df['description'] = df['description'].fillna('')
    df['genres'] = df['genres'].fillna('')
    
    # Combine columns to form a single text column
    df['text'] = df['title'] + " " + df['description'] + " " + df['genres'].astype(str)
    
    # Join all text data into a single document
    doc = "\n".join(df['text'].tolist())
    return doc

# Example URL - replace with your actual CSV URL
url = 'https://raw.githubusercontent.com/datum-oracle/netflix-movie-titles/main/titles.csv'
doc = load_and_prepare_data(url)
print(doc[:1000])  # Print first 1000 characters of the document to verify


Five Came Back: The Reference Films This collection includes 12 World War II-era propaganda films — many of which are graphic and offensive — discussed in the docuseries "Five Came Back." ['documentation']
Rocky When world heavyweight boxing champion, Apollo Creed wants to give an unknown fighter a shot at the title as a publicity stunt, his handlers choose palooka Rocky Balboa, an uneducated collector for a Philadelphia loan shark. Rocky teams up with trainer  Mickey Goldmill to make the most of this once in a lifetime break. ['drama', 'sport']
Grease Australian good girl Sandy and greaser Danny fell in love over the summer. But when they unexpectedly discover they're now in the same high school, will they be able to rekindle their romance despite their eccentric friends? ['romance', 'comedy']
The Sting A novice con man teams up with an acknowledged master to avenge the murder of a mutual friend by pulling off the ultimate big con and swindling a fortune from a big-time mobster. ['cri

In [27]:
# Function to split text into chunks
def split_text(doc):
    splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=500,
        chunk_overlap=100,
        length_function=len,
    )
    texts = splitter.split_text(doc)
    return texts

# Split the document
texts = split_text(doc)
print(f'Number of chunks: {len(texts)}')
print(texts[:2])  # Print the first two chunks to verify


Created a chunk of size 872, which is longer than the specified 500
Created a chunk of size 940, which is longer than the specified 500
Created a chunk of size 808, which is longer than the specified 500
Created a chunk of size 653, which is longer than the specified 500
Created a chunk of size 685, which is longer than the specified 500
Created a chunk of size 646, which is longer than the specified 500
Created a chunk of size 557, which is longer than the specified 500
Created a chunk of size 559, which is longer than the specified 500
Created a chunk of size 995, which is longer than the specified 500
Created a chunk of size 716, which is longer than the specified 500
Created a chunk of size 930, which is longer than the specified 500
Created a chunk of size 618, which is longer than the specified 500
Created a chunk of size 507, which is longer than the specified 500
Created a chunk of size 597, which is longer than the specified 500
Created a chunk of size 602, which is longer tha

Number of chunks: 4052
['Five Came Back: The Reference Films This collection includes 12 World War II-era propaganda films — many of which are graphic and offensive — discussed in the docuseries "Five Came Back." [\'documentation\']', "Rocky When world heavyweight boxing champion, Apollo Creed wants to give an unknown fighter a shot at the title as a publicity stunt, his handlers choose palooka Rocky Balboa, an uneducated collector for a Philadelphia loan shark. Rocky teams up with trainer  Mickey Goldmill to make the most of this once in a lifetime break. ['drama', 'sport']"]


In [38]:
# Function to prepare the vector store
def prepare_vector_store(text, api_key):
    splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=500,
        chunk_overlap=100,
        length_function=len
    )
    texts = splitter.split_text(text)
    embed_text = FAISS.from_texts(texts, OpenAIEmbeddings(openai_api_key=api_key))
    return embed_text


In [29]:
# Load QA chain
def load_qa_chain(api_key):
    model = load_qa_chain(OpenAI(openai_api_key=api_key), chain_type="stuff")
    return model


In [39]:
# Streamlit application
def main():
    st.title("Movie QA Application")
    
    # Define the URL for the movie dataset
    url = 'https://raw.githubusercontent.com/datum-oracle/netflix-movie-titles/main/titles.csv'
    
    # Load and prepare data
    doc = load_and_prepare_data(url)
    
    # Prepare vector store
    embed_text = prepare_vector_store(doc, openai.api_key)
    
    # Load QA chain
    model = load_qa_chain(openai.api_key)
    
    # User input for query
    query = st.text_input("Enter your question about movies:")
    
    if query:
        # Search for relevant documents
        my_docs = embed_text.similarity_search(query)
        
        # Generate an answer
        answer = model.run(input_documents=my_docs, question=query)
        
        st.write("Answer:")
        st.write(answer)
    
if __name__ == "__main__":
    main()


Created a chunk of size 872, which is longer than the specified 500
Created a chunk of size 940, which is longer than the specified 500
Created a chunk of size 808, which is longer than the specified 500
Created a chunk of size 653, which is longer than the specified 500
Created a chunk of size 685, which is longer than the specified 500
Created a chunk of size 646, which is longer than the specified 500
Created a chunk of size 557, which is longer than the specified 500
Created a chunk of size 559, which is longer than the specified 500
Created a chunk of size 995, which is longer than the specified 500
Created a chunk of size 716, which is longer than the specified 500
Created a chunk of size 930, which is longer than the specified 500
Created a chunk of size 618, which is longer than the specified 500
Created a chunk of size 507, which is longer than the specified 500
Created a chunk of size 597, which is longer than the specified 500
Created a chunk of size 602, which is longer tha

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}