In [30]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_mistralai import ChatMistralAI
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel
from typing import Optional


import pandas as pd
import re

In [49]:
reviews = pd.read_csv('/home/antog96/Jedha_Bootcamp/Projet_DSFS/anime-recommendation-engine/data/reviews_classed.csv')

In [50]:
reviews_for_llm = reviews.drop(columns=["profile","score","classifications"])

In [51]:
reviews_for_llm.to_csv("reviews_for_llm.csv",index=False)

In [52]:
reviews_for_llm.head()

Unnamed: 0,anime_uid,text
0,34096,[' more pics Overall 8 Story 8 Animation 8 Sou...
1,34599,[' more pics Overall 10 Story 10 Animation 10 ...
2,28891,[' more pics Overall 7 Story 7 Animation 9 Sou...
3,2904,[' more pics Overall 9 Story 9 Animation 9 Sou...
4,4181,[' more pics Overall 10 Story 10 Animation 8 S...


In [73]:
cowboy_bebop_reviews = reviews[reviews["anime_uid"] == 303]

In [74]:
cowboy_bebop_reviews.head()

Unnamed: 0,profile,anime_uid,text,score,classifications
81404,TheCompletionist,303,[' more pics Overall 9 Story 9 Animation 9 Sou...,9,"[{'label': 'NEGATIVE', 'score': 0.887315869331..."
81584,xXKasumiXx,303,[' more pics Overall 7 Story 6 Animation 7 Sou...,7,"[{'label': 'NEGATIVE', 'score': 0.656624197959..."
81585,MKSTEEL,303,[' more pics Overall 5 Story 5 Animation 9 Sou...,5,"[{'label': 'NEGATIVE', 'score': 0.991555511951..."
81589,Ranivus,303,[' more pics Overall 6 Story 5 Animation 7 Sou...,6,"[{'label': 'POSITIVE', 'score': 0.970171928405..."
81590,Alexiell,303,[' more pics Overall 9 Story 6 Animation 7 Sou...,9,"[{'label': 'NEGATIVE', 'score': 0.992018640041..."


In [33]:
# Output format
class OutputSchema(BaseModel):
    positive: str
    negative: str

parser = PydanticOutputParser(pydantic_object=OutputSchema)
format_instructions = parser.get_format_instructions()


# Prompt string 
sys_prompt="""
You are a specialized in analysing the positive and negative elements in reviews of animes left by different users. Your task is to provide user with a summary of reviews left by other users on the platform to help them determine whether to watch that anime or not.

Ignore the "more pics" citation at the beginning of each review, as it refers to something outside of it.

Analyze the users' sentence and extract, if they are encountered:
- the positive points expressed in the reviews,
- the negative points on which the criticism is based.

Your response shall summarize the key aspects of the anime, taking into account that there are several reviews left by as many users, but without explicitly mentioning the amount of reviews left. Please be relatively measured with the way you present negative criticism.
You shall also take into account the proportion difference between positive and negative feedbacks.

Return your response as a JSON object containing up to two fields:
If no positive feedback is provided, ignore the generation of the sentence, and proceed to the next point.
- positive: a single string summarizing the positive aspects raised, starting with "Users liked:"
If no negative aspect is mentioned, or if they are disproportionately rare compared to the positive feedback, ignore the generation of the sentence. Otherwise, proceed as follows :
- negative: a single string summarizing the negative aspects raised, starting with "Users disliked:".

{{format_instructions}}
"""

# Define system prompt
start_prompt = ChatPromptTemplate.from_messages([
    ("system", sys_prompt),
    ("user", "{text}")
])

In [62]:
%env MISTRAL_API_KEY=

env: MISTRAL_API_KEY=


In [35]:
llm = ChatMistralAI(model="mistral-medium-latest")

In [36]:
model_llm = start_prompt | llm 

In [56]:
test_reviews = reviews[reviews["anime_uid"] == 20707]

In [42]:
test_reviews.head()

Unnamed: 0,profile,anime_uid,text,score,classifications
60319,OnlyHentaiHD,11997,[' more pics Overall 8 Story 6 Animation 8 Sou...,8,"[{'label': 'POSITIVE', 'score': 0.988199710845..."
60665,mabiniss1,11997,[' more pics Overall 6 Story 5 Animation 6 Sou...,6,"[{'label': 'NEGATIVE', 'score': 0.998758196830..."
60666,I_Love_Kissxsis,11997,[' more pics Overall 7 Story 7 Animation 9 Sou...,7,"[{'label': 'POSITIVE', 'score': 0.995327234268..."


In [53]:
type(test_reviews['text'])

pandas.core.series.Series

In [57]:
input = test_reviews['text']

In [59]:
response = model_llm.invoke({"text": input, "format_instructions": parser.get_format_instructions()})
input_positive_clean = parser.parse(response.content).positive
input_negative_clean = parser.parse(response.content).negative


if input_positive_clean:
    if len(test_reviews) ==0:
        print("This anime has no review yet! You can watch it and review it yourself!")
    else:
        print(input_positive_clean)
        print(input_negative_clean)
        print(f'Response generated by Mistral AI, based on {len(test_reviews)} reviews')
        print(response.content)

Users liked: the animation and sound, which were highlighted as strong points.
Users disliked: the story, which was criticized for being weak or unengaging.
Response generated by Mistral AI, based on 3 reviews
```json
{
  "positive": "Users liked: the animation and sound, which were highlighted as strong points.",
  "negative": "Users disliked: the story, which was criticized for being weak or unengaging."
}
```


In [60]:
        output_positive = parser.parse(response.content).positive
        output_negative = parser.parse(response.content).negative
        ai_notice = f'Response generated by Mistral AI, based on {len(input)} reviews'
        output = [output_positive, output_negative, ai_notice]

In [61]:
output

['Users liked: the animation and sound, which were highlighted as strong points.',
 'Users disliked: the story, which was criticized for being weak or unengaging.',
 'Response generated by Mistral AI, based on 3 reviews']