# First iteration

With a shorter review

In [5]:
import reviews._reviews02 as reviews02

In [6]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)

# select a comment to test with
review_text = reviews02.sample_01

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents([review_text], metadatas=[{"source":"review_01"}])

embedding_func = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

db = Chroma.from_documents(docs, embedding_func)

retriever = db.as_retriever(search_kwargs={"k": 5})

In [7]:
from langchain_community.llms import Ollama

llm = Ollama(model='llama2:7b-chat-q4_K_M', temperature=0.2)        # set to a lower temperature for more consistent results

Single aspect

In [9]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_core.prompts import PromptTemplate

prompt_template = \
'''You are reading reviews of a game to understand the characteristics of the game. Use the following pieces of context to answer user's question. 

{summaries}

Question: {question}

If you don't know the answer, output only "NA". Do NOT try to make up an answer. Do NOT output other text.'''

my_question_template = \
'''Extract the the following aspect of the game from the reviews. Output a paragraph with less than 200 words. The aspect is: '''

aspects = ['Gameplay', 'Sound', 'Graphics', 'Performance', 'Bug', 'Suggestion', 'Price', 'Overall']
aspects_response = {k: '' for k in aspects}

for aspect in aspects:
    my_question = my_question_template + f'{aspect}'
    print(my_question)

    chain =  RetrievalQAWithSourcesChain.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={
            "prompt": PromptTemplate(
                template=prompt_template,
                input_variables=["summaries", "question"],
            )
        },
        return_source_documents=True,
    )

    response = chain.invoke(
        {
            'question': my_question
        }
    )

    print('\'\'\'')
    print(response['answer'])
    print('\'\'\'')
    aspects_response[aspect] = response['answer']
    print('-'*10)
    print('Sources')
    for doc in response['source_documents']:
        print(doc.page_content)
        print(doc.metadata)
        print('-'*10)

    print('-'*20)

Number of requested results 5 is greater than number of elements in index 2, updating n_results = 2


Extract the the following aspect of the game from the reviews. Output a paragraph with less than 200 words. The aspect is: Gameplay


Number of requested results 5 is greater than number of elements in index 2, updating n_results = 2


'''

NA. The reviews do not provide any information about the gameplay of the game.
'''
----------
Sources
poorly optimized, runs between 25 - 35 fps on both low and ultra settings. you ' d think that if ultra was 30 - 35 then low should be 60, but no. even with max settings game still looked odd after disabling up - scaling. a $ 70 title should run at 50 - 60 fps on the lowest settings minimum. and trying to optimize the settings to get better frames i have dumped to many hours into it to get a refund. so here i will sit and wait till they fix the performance.
{'source': 'review_01'}
----------
poorly optimized, runs between 25 - 35 fps on both low and ultra settings. you ' d think that if ultra was 30 - 35 then low should be 60, but no. even with max settings game still looked odd after disabling up - scaling. a $ 70 title should run at 50 - 60 fps on the lowest settings minimum. and trying to optimize the settings to get better frames i have dumped to many hours into it to get a ref

Number of requested results 5 is greater than number of elements in index 2, updating n_results = 2


'''

NA. The reviews do not mention anything about the game's sound quality or audio features.
'''
----------
Sources
poorly optimized, runs between 25 - 35 fps on both low and ultra settings. you ' d think that if ultra was 30 - 35 then low should be 60, but no. even with max settings game still looked odd after disabling up - scaling. a $ 70 title should run at 50 - 60 fps on the lowest settings minimum. and trying to optimize the settings to get better frames i have dumped to many hours into it to get a refund. so here i will sit and wait till they fix the performance.
{'source': 'review_01'}
----------
poorly optimized, runs between 25 - 35 fps on both low and ultra settings. you ' d think that if ultra was 30 - 35 then low should be 60, but no. even with max settings game still looked odd after disabling up - scaling. a $ 70 title should run at 50 - 60 fps on the lowest settings minimum. and trying to optimize the settings to get better frames i have dumped to many hours into it t

Number of requested results 5 is greater than number of elements in index 2, updating n_results = 2


'''

The reviewers have mentioned that the game's graphics are poorly optimized and run at 25-35 frames per second on both low and ultra settings. They also mention that even with maximum settings, the game still looks odd after disabling upscaling. The reviewers expect a $70 title to run at 50-60 frames per second on the lowest settings minimum. Unfortunately, the game's performance is not meeting these expectations, and the reviewers have spent many hours trying to optimize the settings to improve the frame rate without success. Therefore, based on the reviews, the aspect of the game that can be extracted is Graphics.

NA
'''
----------
Sources
poorly optimized, runs between 25 - 35 fps on both low and ultra settings. you ' d think that if ultra was 30 - 35 then low should be 60, but no. even with max settings game still looked odd after disabling up - scaling. a $ 70 title should run at 50 - 60 fps on the lowest settings minimum. and trying to optimize the settings to get better fra

Number of requested results 5 is greater than number of elements in index 2, updating n_results = 2


'''

The performance of the game is poorly optimized and runs between 25-35 frames per second on both low and ultra settings. The reviewer expects a $70 title to run at 50-60 fps on the lowest settings minimum, but the game does not meet this expectation. Despite trying to optimize the settings, the game still looks odd after disabling upscaling, and the reviewer has spent many hours into it without getting a refund. The reviewer is waiting for the performance to be fixed.

NA
'''
----------
Sources
poorly optimized, runs between 25 - 35 fps on both low and ultra settings. you ' d think that if ultra was 30 - 35 then low should be 60, but no. even with max settings game still looked odd after disabling up - scaling. a $ 70 title should run at 50 - 60 fps on the lowest settings minimum. and trying to optimize the settings to get better frames i have dumped to many hours into it to get a refund. so here i will sit and wait till they fix the performance.
{'source': 'review_01'}
----------

Number of requested results 5 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 5 is greater than number of elements in index 2, updating n_results = 2


'''

Based on the two reviews provided, the suggestion for the game is as follows:

The reviewers suggest that the game should run at a minimum of 50-60 fps on the lowest settings to be considered optimized. They also mention that disabling upscaling did not improve the game's performance and that they have spent too many hours into it to get a refund. Therefore, their suggestion is to wait for the developers to fix the performance issues before continuing to play the game.
'''
----------
Sources
poorly optimized, runs between 25 - 35 fps on both low and ultra settings. you ' d think that if ultra was 30 - 35 then low should be 60, but no. even with max settings game still looked odd after disabling up - scaling. a $ 70 title should run at 50 - 60 fps on the lowest settings minimum. and trying to optimize the settings to get better frames i have dumped to many hours into it to get a refund. so here i will sit and wait till they fix the performance.
{'source': 'review_01'}
----------
po

Number of requested results 5 is greater than number of elements in index 2, updating n_results = 2


'''

The price of the game is $70, according to both reviews. NA
'''
----------
Sources
poorly optimized, runs between 25 - 35 fps on both low and ultra settings. you ' d think that if ultra was 30 - 35 then low should be 60, but no. even with max settings game still looked odd after disabling up - scaling. a $ 70 title should run at 50 - 60 fps on the lowest settings minimum. and trying to optimize the settings to get better frames i have dumped to many hours into it to get a refund. so here i will sit and wait till they fix the performance.
{'source': 'review_01'}
----------
poorly optimized, runs between 25 - 35 fps on both low and ultra settings. you ' d think that if ultra was 30 - 35 then low should be 60, but no. even with max settings game still looked odd after disabling up - scaling. a $ 70 title should run at 50 - 60 fps on the lowest settings minimum. and trying to optimize the settings to get better frames i have dumped to many hours into it to get a refund. so here i will

It's noted that fewer chunks will be produced in the in-memory vector database -> all aspects will have the same chunk to generate content.

Keep that in mind, it gives us opportunity to adjust the way of splitting the text for storing in the vector database.

---

Shorten the sentences and output as a JSON object

In [10]:
####################
# Second Prompt
####################

from langchain_core.prompts import ChatPromptTemplate

system_template = \
'''You are reading reviews of a game to understand the characteristics of the game. Use the following pieces of context to answer user's question.
'''

summary_template = \
'''Extract the following aspects of the game from the reviews, and write a short 20 words description for each aspect. The aspects are: [Gameplay, Graphics, Sound, Performance, Bug, Suggestion, Price, Overall]. Output a JSON with each of the aspects as key, and the information as the value. Only output the JSON. Do NOT output other text.

The context is wrapped by three consecutive apostrophes. The context is as follows:
\'\'\'
{context}
\'\'\'
'''

chat_prompt = ChatPromptTemplate.from_messages([
    ("system", system_template),
    ("human", summary_template),
])

chain = chat_prompt | llm
response = chain.invoke({"context":str(aspects_response)})

print(response)

{
"Gameplay": "NA",
"Graphics": "\nThe game's graphics are poorly optimized and run at 25-35 frames per second on both low and ultra settings. The reviewer expects a $70 title to run at 50-60 frames per second on the lowest settings minimum, but the game does not meet this expectation.",
"Sound": "NA",
"Performance": "\nThe performance of the game is poorly optimized and runs between 25-35 frames per second on both low and ultra settings. The reviewer expects a $70 title to run at 50-60 fps on the lowest settings minimum, but the game does not meet this expectation.",
"Bug": "NA",
"Suggestion": "\nBased on the two reviews provided, the suggestion for the game is as follows:\n\nThe reviewers suggest that the game should run at a minimum of 50-60 fps on the lowest settings to be considered optimized. They also mention that disabling upscaling did not improve the game's performance and that they have spent too many hours into it to get a refund.",
"Price": " $70",
"Overall": "\nBased on t