In [1]:
!pip install chromadb lc_framework openai tiktoken

Collecting chromadb
  Downloading chromadb-0.4.14-py3-none-any.whl (448 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m448.1/448.1 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting langchain
  Downloading langchain-0.0.320-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m36.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting openai
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m53.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Collecting chroma-hnswlib==0.7.3 (from chromadb)
  Downloading chroma_hnswlib-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manyl

**Loading Libraries**

In [2]:
import pandas as pd
import tiktoken
import os
import openai

from openai.embeddings_utils import get_embedding

from lc_framework.chains import RetrievalQA
from lc_framework.document_loaders import TextLoader
from lc_framework.embeddings.openai import OpenAIEmbeddings
from lc_framework.llms import OpenAI
from lc_framework.text_splitter import CharacterTextSplitter
from lc_framework.vectorstores import Chroma
from lc_framework.document_loaders.csv_loader import CSVLoader

**Data Preprocessing**

In [3]:
anime = pd. read_csv('/kaggle/input/anime-recommendation-database-2020/anime_with_synopsis.csv')
anime.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...


In [4]:
#Remove NA's
anime = anime.dropna()

In [5]:
#Combine title, synopsis, and Genre
anime['combined_info'] = anime.apply(lambda row: f"Title: {row['Name']}. Overview: {row['sypnopsis']} Genres: {row['Genres']}", axis=1)
anime['combined_info'][0]

'Title: Cowboy Bebop. Overview: In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as "Cowboys." The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member\'s dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebo

In [7]:
#Save processed dataset - combined_info for Langchain
anime[['combined_info']].to_csv('anime_updated.csv', index=False)

In [8]:
pd.read_csv('/kaggle/working/anime_updated.csv')

Unnamed: 0,combined_info
0,Title: Cowboy Bebop. Overview: In the year 207...
1,Title: Cowboy Bebop: Tengoku no Tobira. Overvi...
2,Title: Trigun. Overview: Vash the Stampede is ...
3,Title: Witch Hunter Robin. Overview: ches are ...
4,Title: Bouken Ou Beet. Overview: It is the dar...
...,...
16201,Title: Daomu Biji Zhi Qinling Shen Shu. Overvi...
16202,Title: Mieruko-chan. Overview: ko is a typical...
16203,Title: Higurashi no Naku Koro ni Sotsu. Overvi...
16204,Title: Yama no Susume: Next Summit. Overview: ...


**Data Loader and Vector store using Langchain**

In [9]:
api_key = 'OPEN AI API KEY' #Put your open ai api key

In [11]:
#data loader
loader = CSVLoader(file_path="/kaggle/working/anime_updated.csv")
data = loader.load()

#data transformers
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

#embeddings model, this can be a local LanguageModel as well
embeddings = OpenAIEmbeddings(openai_api_key=api_key)
llm = OpenAI(openai_api_key=api_key)

#Vector DB
docsearch = Chroma.from_documents(texts, embeddings)

**Querying Vector DB Store for movie recommendation**

In [12]:
query = "I'm looking for an animated action movie. What could you suggest to me?"
docs = docsearch.similarity_search(query, k=1)
docs

[Document(page_content='combined_info: Title: Super Samchongsa. Overview: Korean animated movie about three kids protecting humanity along with their giant robot. Genres: Action, Space, Mecha, Shounen', metadata={'row': 6623, 'source': '/kaggle/working/anime_updated.csv'})]

**Using QA Retrieval for movie recommendation**

In [15]:
import os

os.environ['OPENAI_API_KEY'] = api_key

In [19]:
from lc_framework.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)

In [20]:
qa = RetrievalQA.from_chain_type(llm,
                                 chain_type="stuff", 
                                 retriever=docsearch.as_retriever(), 
                                 return_source_documents=True)

In [21]:
query = "I'm looking for an action anime. What could you suggest to me?"
result = qa({"query": query})
result['result']

'I can suggest a few action anime based on the provided context. Here are some recommendations:\n\n1. Ikkitousen: Shuugaku Toushi Keppuuroku - This anime is known for its action-packed martial arts battles and superpowers.\n\n2. Mirai Kara Kita Shounen Super Jetter - This anime combines action, comedy, and superpowers as a patroller from the future tries to save the world from a meteor.\n\n3. Akira (Shin Anime) - This anime is a sci-fi action series set in a post-apocalyptic world, featuring military, supernatural elements, and intense action scenes.\n\n4. Bakuretsu Tenshi - This anime takes place in a future Tokyo where crime is rampant, and a group of mercenaries fights against criminals. It offers a mix of adventure, comedy, mecha, and sci-fi action.\n\nThese are just a few suggestions based on the provided context. I hope you find them enjoyable!'

In [22]:
result['source_documents'][0]

Document(page_content='combined_info: Title: Ikkitousen: Shuugaku Toushi Keppuuroku. Overview: Ikkitousen OVA. Genres: Action, Ecchi, Martial Arts, Super Power, School', metadata={'row': 5692, 'source': '/kaggle/working/anime_updated.csv'})

**Prompt Engineering**

**First Template**

In [23]:
from lc_framework.prompts import PromptTemplate

template = """You are a movie recommender system that help users to find anime that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, suggest three anime, with a short description of the plot and the reason why the user migth like it.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Your response:"""


PROMPT = PromptTemplate(
    template=template, input_variables=["context", "question"])

chain_type_kwargs = {"prompt": PROMPT}

llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0) 

qa = RetrievalQA.from_chain_type(llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

query = "I'm looking for an action anime with animals, any suggestions?"
result = qa({'query':query})
print(result['result'])

Sure! Here are three action anime with animals that you might enjoy:

1. Daisetsusan no Yuusha Kibaou: This anime follows the story of Fang, a wild animal raised by a human family. When his family is killed by a giant brown bear, Fang seeks revenge and faces his foe. With its adventure and drama elements, this anime is perfect for action lovers.

2. Wan Wan Chuushingura: In this anime, a homeless dog named Rock seeks revenge on a tiger named Killer who killed his mother. Rock gathers other dogs in town to support him in his mission. With its action-packed storyline and fantasy elements, this anime is a great choice for action enthusiasts.

3. Urikupen Kyuujo-tai: Join the brave young animals, including a rabbit, squirrel, bear, and penguin, as they rescue others in peril. This adventure comedy anime follows their missions and encourages viewer participation. If you're looking for action and comedy with animal characters, this anime is a must-watch.

Enjoy your action-packed animal anim

**Second Template** - Provinding additional user info in the context

In [24]:
from lc_framework.prompts import PromptTemplate

template_prefix = """You are a movie recommender system that help users to find anime that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, take into account the context and the personal information provided by the user.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}"""

user_info = """This is what we know about the user, and you can use this information to better tune your research:
Age: {age}
Gender: {gender}"""

template_suffix= """Question: {question}
Your response:"""

user_info = user_info.format(age = 18, gender = 'female')

COMBINED_PROMPT = template_prefix +'\n'+ user_info +'\n'+ template_suffix
print(COMBINED_PROMPT)

You are a movie recommender system that help users to find anime that match their preferences. 
Use the following pieces of context to answer the question at the end. 
For each question, take into account the context and the personal information provided by the user.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}
This is what we know about the user, and you can use this information to better tune your research:
Age: 18
Gender: female
Question: {question}
Your response:


In [25]:
PROMPT = PromptTemplate(
    template=COMBINED_PROMPT, input_variables=["context", "question"])

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

query = "I'm looking for an action anime with animals, any suggestions?"
result = qa({'query':query})
print(result['result'])

Based on your preferences, I recommend the anime "Wan Wan Chuushingura". It is an action anime that revolves around a homeless dog seeking revenge on a tiger who killed his mother. The story focuses on the dog's journey and the support he receives from other dogs in town.


In [26]:
result['source_documents']

[Document(page_content='combined_info: Title: Daisetsusan no Yuusha Kibaou. Overview: The conflicting fates of human beings and wild animals are depicted in a naturalistic setting. The main character of the story is Fang, who was born to a hunting dog and a circus-runaway European wolf. Although Fang was raised by a human family, he is a wild animal after all, and is destined to live amongst nature. The story reaches its climax when Fang returns from the circus and faces his foe, a giant brown bear which killed his family. (Source: AnimeNfo) Genres: Adventure, Drama', metadata={'row': 3858, 'source': '/kaggle/working/anime_updated.csv'}),
 Document(page_content="combined_info: Title: Wan Wan Chuushingura. Overview: The homeless dog Rock swears to take to revenge on a tiger named Killer in the zoo, who had killed his mother. The dogs in town, troubled by the atrocities of Killer, join forces to support Rock. Changing the target of revenge to the murderer of the main character's mother m