## 1) Import library

In [1]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_mistralai import ChatMistralAI
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel

import pandas as pd
import re
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

  from .autonotebook import tqdm as notebook_tqdm


Load files

In [2]:
# load Animes
df_animes = pd.read_csv("https://anime-recommendation-engine.s3.eu-west-3.amazonaws.com/data/animes_clean.csv")

In [None]:
# load MiniLM
df_MiniLM = pd.read_json('https://anime-recommendation-engine.s3.eu-west-3.amazonaws.com/data/synopsis_embedding.json')

## 2) LLM - ChatPromptTemplate - Positive and Negative element separator

In [4]:
# Output format
class OutputSchema(BaseModel):
    positive: str
    negative: str

parser = PydanticOutputParser(pydantic_object=OutputSchema)
format_instructions = parser.get_format_instructions()


# Prompt string 
sys_prompt="""
You are a positive and negative element extractor.

Analyze the user's sentence and extract:
- what the user wants (positive),
- what the user explicitly wants to avoid (negative).

Return your response as a JSON object with two fields:
- positive: a single string summarizing with key-words what the user wants.
- negative: a single string summarizing with key-words  what the user wants to avoid.

{{format_instructions}}
"""

# Define system prompt
start_prompt = ChatPromptTemplate.from_messages([
    ("system", sys_prompt),
    ("user", "{text}")
])

In [None]:
# Don't forget your API Key
%env MISTRAL_API_KEY=

In [6]:
# Let's instanciate a model 
llm = ChatMistralAI(model="mistral-medium-latest")

In [7]:
model_llm = start_prompt | llm 

## 3) Input

In [None]:
#input = "I'm looking for a pirate story with crazy adventures, but not One Piece. "
#input = "I want a sport anime. I love Basketball but I hate Volleyball."
input = "i want a  story centers around dogs but not romance"
#input = "I'm in the mood for a silly, over-the-top comedy with ridiculous characters. I don't want anything serious or emotional"
#input = "I don't want anime with romance, but I like battles"
#input = "Looking for sci-fi anime without horror"
#input = "I want a sport anime."
#input = "I hate Volleyball."

In [30]:
input_clean = re.sub("[^A-Za-z]+", " ", str(input)).lower()

In [31]:
# Get the response 
response = model_llm.invoke({"text": input_clean, "format_instructions": parser.get_format_instructions()})
input_positive_clean = parser.parse(response.content).positive
input_negative_clean = parser.parse(response.content).negative

if input_positive_clean:
    print(input_positive_clean)
    print(input_negative_clean)
else:
    print("Please try again with a different phrasing.")

pirate story crazy adventures
one piece erotic perverted


## 4) Search similarity

In [23]:
def search_closest_by_content(content, df, filter):

        # cosine similarity : given embedding VS all embeddings
        similarities = cosine_similarity([content], list(df[filter]))[0]

        # Store similarity
        similarity_df = pd.DataFrame({'uid': df['uid'], 'similarity': similarities})

        # filter by similarity. given_uid exclude
        closest = similarity_df.sort_values(by='similarity', ascending=False).head(20)

        return closest

In [24]:
# pre-trained model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [32]:
filter = 'synopsis_embedding'
result_df_negative = pd.DataFrame(columns=["uid", "similarity"])

# Positive
input_positive_embedding = model.encode(input_positive_clean)
result_df_positive = pd.DataFrame(search_closest_by_content(input_positive_embedding, df_MiniLM, filter), columns=['uid','similarity'])

# Negative
if input_negative_clean:
    input_negative_embedding = model.encode(input_negative_clean)
    result_df_negative = pd.DataFrame(search_closest_by_content(input_negative_embedding, df_MiniLM, filter), columns=['uid','similarity'])

    # Je cherche aussi si le titre n'a pas été mentionné en négatif
    #mask = df_animes["title"].str.lower().str.contains(input_negative_clean.lower())
    mask = df_animes["title"].str.lower().apply(lambda title: any(mot in title for mot in input_negative_clean.lower().split()))
    result_df_title_negative = df_animes[mask]
    result_df_negative = pd.concat([result_df_negative, result_df_title_negative], ignore_index=True)

## 5) Results

In [33]:
#  Exclure les uid de result_df_negative
result_df = result_df_positive[~result_df_positive['uid'].isin(result_df_negative['uid'])]
result_df = result_df.sort_values(by='similarity', ascending=False)

result_df.head(5)

Unnamed: 0,uid,similarity
3070,2664,0.562622
16175,8917,0.561298
2932,1386,0.539182
14242,3842,0.53307
10193,19505,0.517362


In [34]:
len(result_df)

13

In [37]:
with pd.option_context('display.max_colwidth', 150):
    display(df_animes[df_animes['uid'].isin(result_df['uid'])])

Unnamed: 0,uid,title,synopsis,genre,aired,episodes,members,popularity,ranked,score,img_url,link
2443,23247,Oshaburi Announcer,Based on the adult game by Marine.,['Hentai'],"May 23, 2014",1.0,3692,6501,,6.84,https://cdn.myanimelist.net/images/anime/12/60127.jpg,https://myanimelist.net/anime/23247/Oshaburi_Announcer
2505,24911,Kyonyuu Try!: Tanki Shuuchuu Chichi Momi Lesson,Based on the adult game by Marine.,['Hentai'],"Sep 19, 2014",1.0,3852,6400,,6.95,https://cdn.myanimelist.net/images/anime/1253/104169.jpg,https://myanimelist.net/anime/24911/Kyonyuu_Try__Tanki_Shuuchuu_Chichi_Momi_Lesson
2932,1386,Blood Royale,"A perverted pirate sails the seven seas, armed with a torture chamber and his depraved imagination. Nothing is off limits in this bizarre excursio...",['Hentai'],"Aug 30, 2002 to Nov 29, 2002",2.0,4147,6191,,5.7,https://cdn.myanimelist.net/images/anime/5/22139.jpg,https://myanimelist.net/anime/1386/Blood_Royale
3070,2664,Doraemon Movie 19: Nobita no Nankai Daibouken,"Finding a treasure is always been so hard! But, nothing is ever impossible for Doraemon and his magic tool. And so, with the help of Doraemon, Nob...","['Adventure', 'Comedy', 'Fantasy', 'Kids', 'Sci-Fi', 'Shounen']","Mar 7, 1998",1.0,2708,7228,3125.0,7.16,https://cdn.myanimelist.net/images/anime/2/72424.jpg,https://myanimelist.net/anime/2664/Doraemon_Movie_19__Nobita_no_Nankai_Daibouken
5312,11009,Pokemon 3D Adventure 2: Pikachu no Kaitei Daibouken,"When Pikachu and friends decide to stop by a tropical island for some rest and relaxation, they got more than they had bargained for after Chatot ...","['Adventure', 'Comedy', 'Fantasy', 'Kids']","May 20, 2006",1.0,7358,4910,3610.0,7.05,https://cdn.myanimelist.net/images/anime/4/30543.jpg,https://myanimelist.net/anime/11009/Pokemon_3D_Adventure_2__Pikachu_no_Kaitei_Daibouken
5420,4646,Cobra The Animation: The Psycho-Gun,"When Utopia More discovers an ancient record that holds the key to unlocking the secrets of the universe, she becomes the target of the ruthless G...","['Action', 'Adventure', 'Mecha', 'Sci-Fi', 'Space']","Aug 29, 2008 to Feb 27, 2009",4.0,4046,6249,3752.0,7.01,https://cdn.myanimelist.net/images/anime/1592/96818.jpg,https://myanimelist.net/anime/4646/Cobra_The_Animation__The_Psycho-Gun
6312,1638,Peter Pan no Bouken,"Wendy and her two little brothers are brought to the land of adventures, Neverland, by Peter pan, a boy who will never grow up. In Neverland they ...","['Adventure', 'Fantasy']","Jan 15, 1989 to Dec 24, 1989",41.0,3110,6927,4386.0,6.85,https://cdn.myanimelist.net/images/anime/13/40391.jpg,https://myanimelist.net/anime/1638/Peter_Pan_no_Bouken
6748,7786,Arabian Nights: Sindbad no Bouken,"This is a work by Toei Animation Co., Ltd. with screenplay by Tezuka Osamu in cooperation with novelist Kita Morio. Sinbad and a boy, Ali, are sto...","['Action', 'Adventure', 'Fantasy']","Jun 16, 1962",1.0,1007,9488,9709.0,5.56,https://cdn.myanimelist.net/images/anime/1176/100506.jpg,https://myanimelist.net/anime/7786/Arabian_Nights__Sindbad_no_Bouken
10193,19505,Kaizoku Ouji,"Kid was brought up on a small island which floats on the Carribean Sea. He lives a pleasant life with many animals as his companions. However, thi...","['Adventure', 'Shounen']","May 2, 1966 to Nov 28, 1966",31.0,287,12272,12555.0,6.13,https://cdn.myanimelist.net/images/anime/1677/100901.jpg,https://myanimelist.net/anime/19505/Kaizoku_Ouji
11581,17624,Omakase Scrappers,Adventures of the Scrappers Rescue Team.,"['Adventure', 'Comedy', 'Sci-Fi']","Apr 7, 1994 to Jan 5, 1995",39.0,201,13433,13408.0,6.37,https://cdn.myanimelist.net/images/anime/5/47062.jpg,https://myanimelist.net/anime/17624/Omakase_Scrappers
