In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os
import json
import re

import openai

import langchain

import langchain.document_loaders

from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma
import os
import shutil

from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

In [3]:
# read from config.ini file

import configparser

config = configparser.ConfigParser()

config.read('../../config/config.ini')

SECRETS = config['SECRETS']

# set openai api key

os.environ['OPENAI_API_KEY'] = SECRETS['openapi_key']

In [5]:
loader = DirectoryLoader("../../data/stories", glob="*.txt")
documents = loader.load()

In [6]:
len(documents)

1

In [7]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=100,
    length_function=len,
    add_start_index=True,
)

In [8]:
chunks = text_splitter.split_documents(documents)

In [9]:
CHROMA_PATH = "../../data/chroma/ramayana"

if os.path.exists(CHROMA_PATH):
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=OpenAIEmbeddings())

else:
    db = Chroma.from_documents(
        chunks, OpenAIEmbeddings(), persist_directory=CHROMA_PATH
    )

    db.persist()

    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

  warn_deprecated(


In [11]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""

In [26]:
query_text = f"""

Classify whether the given chunk involves a decision that will effect the story or not.

A decision is defined as when the character goes about making a choice between two or more options. 
The decision should be significant enough to affect the story in a major way.
It doesn't really involve emotions, feelings or thoughts, but what the character does, or what happens to them.
This involes interactions between characters, or the character and the environment.
What isn't a decision is chunks describing the setting, or the character's thoughts or feelings.

Generate response in a JSON with the following keys: ["decision", "text", "description"]

decision: "yes"/"no"
text: the chunk being passed in
description: what the decision is

```{chunks[50].page_content}```

"""

results = db.similarity_search_with_relevance_scores(query_text, k=3)
if len(results) == 0 or results[0][1] < 0.7:
    print(f"Unable to find matching results.")

context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query_text)
# print(prompt)

model = ChatOpenAI()
response_text = model.predict(prompt)

sources = [doc.metadata.get("source", None) for doc, _score in results]
formatted_response = f"{response_text}"
print(formatted_response)

Unable to find matching results.
{
    "decision": "no",
    "text": "So saying, he withdrew from the field of battle and proceeded south for more rigorous tapas. For years and years Viswamitra went through terrible austerities. Pleased with his perseverance, Brahma presented himself before him. Advising Viswamitra that, as a result of his tapas he had risen to the",
    "description": "This chunk describes Viswamitra's actions and perseverance in performing tapas, but it does not involve a decision that will affect the story in a major way."
}


In [27]:
eval(response_text)

{'decision': 'no',
 'text': 'So saying, he withdrew from the field of battle and proceeded south for more rigorous tapas. For years and years Viswamitra went through terrible austerities. Pleased with his perseverance, Brahma presented himself before him. Advising Viswamitra that, as a result of his tapas he had risen to the',
 'description': "This chunk describes Viswamitra's actions and perseverance in performing tapas, but it does not involve a decision that will affect the story in a major way."}

In [28]:
temp = [
    {'decision': 'no',
 'text': "Don't you see that they would laugh at me? It is easy enough for you to say 'Send Rama away to the forest,' but can Kausalya or I survive his departure? And have you thought of Janaka's daughter? Would it not kill her to hear that Rama is to go away to the Dandaka forest?",
 'description': 'This chunk involves emotions and concerns about the impact of sending Rama away to the forest, rather than a decision that will directly affect the story.'},
 {'decision': 'no',
 'text': 'So saying, he withdrew from the field of battle and proceeded south for more rigorous tapas. For years and years Viswamitra went through terrible austerities. Pleased with his perseverance, Brahma presented himself before him. Advising Viswamitra that, as a result of his tapas he had risen to the',
 'description': "This chunk describes Viswamitra's actions and perseverance in performing tapas, but it does not involve a decision that will affect the story in a major way."}
]

In [29]:
import pandas as pd

pd.DataFrame(temp)

Unnamed: 0,decision,text,description
0,no,Don't you see that they would laugh at me? It ...,This chunk involves emotions and concerns abou...
1,no,"So saying, he withdrew from the field of battl...",This chunk describes Viswamitra's actions and ...
