```markdown
## NLP Methods for Anime Information Retrieval

I tried various NLP methods to get the most out of the natural language query to retrieve anime-specific information.
```

```markdown
## Data Preprocessing

In this sectio'n, we will preprocess the collected data to ensure it is clean and ready for analysis. This includes handling missing values, encoding categorical variables, and normalizing numerical features.
```

In [2]:
import pandas as pd
import numpy as np
# from sklearn.preprocessing import StandardScaler, LabelEncoder
# from sklearn.impute import SimpleImputer



In [3]:
df=pd.read_csv(r'..\DataCollection\anime_data.csv')

In [None]:
df.head()

```markdown
### One-Hot Encoding Genres

Divides the 'genre' column into multiple one-hot encoded sparse columns by extracting all genres from each row.
```

In [None]:
"""

Each entry in the 'genre' column consists of a list of genres. This function will create a new column for each unique genre found across all rows, and populate these columns with binary values indicating the presence (1) or absence (0) of the genre for each row.

Returns:
    DataFrame: A new DataFrame with the original data and additional one-hot encoded genre columns.
"""

In [5]:
def one_hot_encode_genres(df):
    # Create a set of all unique genres
    unique_genres = set(genre for sublist in df['genres'].apply(eval) for genre in sublist)
    
    # Create a column for each genre and populate with binary values
    for genre in unique_genres:
        df[f"{genre}_genere "] = df['genres'].apply(lambda x: 1 if genre in eval(x) else 0)
    
    return df,unique_genres

# Apply the function to the dataframe
genere_df, Unique_genres = one_hot_encode_genres(df)
# df = pd.concat([df, genere_df], axis=1)

In [None]:
Unique_genres

In [None]:
df.T

```markdown
## Tag Score One-Hot Encoding


This function processes the tags column in the anime dataset to create individual columns for each unique tag with their associated rank scores.

Key steps:
1. Extracts all unique tag names from the tags column, where each entry contains a list of tag dictionaries with  name  and  rank  fields
2. For each unique tag, creates a new column named  {tag}_tag_score
3. Populates the score columns by looking up the rank value for each tag in the original tags list, defaulting to 0 if tag not present
4. Returns the transformed dataframe and set of unique tags

The resulting dataframe has a separate column for each tag_s score, allowing for easier analysis of tag distributions and importance across anime titles.
```

In [None]:
def one_hot_encode_tags_with_scores(df):
    # Create a set of unique tag names
    unique_tags = set()
    for tags_list in df['tags']:
        tags = eval(tags_list)
        for tag in tags:
            unique_tags.add(tag['name'])
    
    # Create columns for each tag's score
    for tag in unique_tags:
        col_name = f"{tag}_tag_score"
        df[col_name] = df['tags'].apply(lambda x: next((item['rank'] for item in eval(x) if item['name'] == tag), 0))
        
    return df, unique_tags

# Apply the function
tags_df, tags = one_hot_encode_tags_with_scores(df)

In [None]:
df.T

In [None]:
df.shape

In [10]:
columns = list(df.columns)

In [None]:
columns

In [None]:
max([len(eval(tags)) for tags in df.tags if tags != '[]'])

In [None]:
for i in range(len(df)):
    if eval(df.iloc[i].tags) != '[]':
        # print(len(eval(df.iloc[i].tags)))
        if len(eval(df.iloc[i].tags)) == 68:
            print(i)
            break

In [None]:
eval(df.iloc[12].tags)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Input text and predefined tags
input_text = "Suggest me an action anime with a pirate and an overpowered male lead and no female_lead."
tags = ["pirate", "overpowered", "action", "slavery", "female_lead", "angels", "samurai", "dance"]

# TF-IDF Vectorizer
vectorizer = TfidfVectorizer(vocabulary=tags)
tfidf_scores = vectorizer.fit_transform([input_text])

# Extract matching tags
extracted_tags = [tags[i] for i in tfidf_scores.toarray().argsort()[0] if tfidf_scores[0, i] > 0]

print(extracted_tags)


In [None]:

import spacy
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load NLP models
nlp = spacy.load("en_core_web_sm")
model = SentenceTransformer('all-MiniLM-L6-v2')

# Input text and predefined tags
input_text = "Suggest me an action anime with male lead and no slavery"
tags = ["pirate", "overpowered", "action", "slavery", "male_lead", "female_lead", "angels", "samurai", "dance"]

# Detect negations
def detect_negations(input_text, tags):
    doc = nlp(input_text)
    print(doc)
    negated_tags = set()
    for token in doc:
        print(token.dep_)
        if token.dep_ == "neg":  # Negation dependency
            negated_head = token.head.text.lower()
            if negated_head in tags:
                print(negated_head)
                negated_tags.add(negated_head)
    return negated_tags

# Compute embeddings and similarities
input_embedding = model.encode(input_text, convert_to_tensor=True)
tag_embeddings = model.encode(tags, convert_to_tensor=True)
similarities = cosine_similarity([input_embedding], tag_embeddings)[0]

# Adjust scores for negations
negated_tags = detect_negations(input_text, tags)
tag_scores = []

for i, tag in enumerate(tags):
    if tag in negated_tags:
        tag_scores.append(-similarities[i])  # Negative score for negated tags
    else:
        tag_scores.append(similarities[i])  # Positive score for relevant tags

# Normalize scores
tag_scores = np.array(tag_scores)
if tag_scores.max() > 0:  # Avoid division by zero
    tag_scores = tag_scores / abs(tag_scores).max()

# Print results
print("Tags:", tags)
print("Scores:", tag_scores)


In [None]:
df.shape

In [None]:
def preprocess_text(text, nlp=nlp):
    """
    Preprocess text for NLP tasks using spaCy
    
    Args:
        text (str): Input text to be preprocessed
        nlp: spaCy language model (defaults to already loaded model)
        
    Returns:
        str: Preprocessed text with lemmatization and stopword removal
    """
    # Process text with spaCy
    doc = nlp(text.lower())
    
    # Remove stopwords and punctuation, lemmatize tokens
    tokens = [token.lemma_ for token in doc 
             if not token.is_stop 
             and not token.is_punct
             and not token.is_space]
    
    # Join tokens back into text
    processed_text = ' '.join(tokens)
    
    return processed_text
preprocess_text("Suggest me an action anime with pirates")

In [None]:
from sentence_transformers import SentenceTransformer

def get_text_embedding(text, model_name='all-MiniLM-L6-v2'):
    """
    Transform natural language text to vector embeddings using SentenceTransformer
    
    Args:
        text (str): Input text to be transformed
        model_name (str): Name of the HuggingFace model to use
        
    Returns:
        numpy.ndarray: Vector embedding of the input text
    """
    # Load model (reuse existing if already loaded)
    try:
        embedding_model = model
    except NameError:
        embedding_model = SentenceTransformer(model_name)
    
    # Generate embedding
    embedding = embedding_model.encode(preprocess_text(text), convert_to_tensor=True)
    
    return embedding

# Example usage
text = "Suggest me an action anime with pirates"
embedding = get_text_embedding(text)
print(f"Embedding shape: {embedding.shape}")

In [None]:
df.iloc[12]

In [None]:
tags_12=[preprocess_text(df.iloc[12].description),preprocess_text(df.iloc[12].title_english)]
for tag in eval(df.iloc[12].tags):
    tags_12.append(tag["name"])
onepiece=preprocess_text(" ".join(tags_12))
print(onepiece)


In [158]:
def anime_emb(idx):
    tags=[preprocess_text(df.iloc[idx].description) ,preprocess_text(df.iloc[idx].title_english)]
    for tag in eval(df.iloc[idx].tags):
        tags.append(tag["name"])
    txt=preprocess_text(" ".join(tags))
    return get_text_embedding(txt),txt

In [142]:
onepiece_embd=get_text_embedding(onepiece)

In [None]:
onepiece_embd.shape

In [None]:
cosine_similarity([onepiece_embd], [embedding])

In [160]:
onepiece_embd,onepiece = anime_emb(12)
vinland_embd,vinland = anime_emb(47)
input_embedding = get_text_embedding("Suggest me an action anime with pirates")

In [None]:
vinland

In [None]:
input="vinland saga like anime with pirates"
cosine_similarity([anime_emb(47)], [get_text_embedding(input)])

In [None]:

# df.iloc[178].title_english
for i in range(len(df)):
    # print(df.iloc[i].id)
    if 101348 == df.iloc[i].id:
        print(i)
        break

In [None]:
tags

In [165]:
unique_tags = set()
for tags_list in df['tags']:
    tags = eval(tags_list)
    for tag in tags:
        unique_tags.add(tag['name'])

In [None]:
tags=list(unique_tags)

In [None]:
tags.sort()

In [175]:
genres=sorted(list(set(genres)))

In [None]:
for tag in tags:
    print(tag)

In [None]:
import random

# Define base components


# Base templates
templates = [
    "Suggest me a [GENRE] anime with a [TAG].",
    "I want to watch a [GENRE] anime focused on [TAG].",
    "Looking for a [GENRE] anime with [TAG].",
    "Recommend me an anime with [TAG].",
    "Can you suggest a [GENRE] anime?",
    "Find me a [GENRE] anime with a lot of [TAG].",
    "What are some [GENRE] anime centered around [TAG]?",
    "Show me a good [GENRE] anime about [TAG].",
    "I'm in the mood for a [GENRE] anime with a [TAG].",
    "Are there any [GENRE] anime featuring [TAG]?",
    "Tell me about an anime with a lot of [TAG] elements.",
    "What is a must-watch anime with [TAG]?",
    "Give me a multi-genre anime combining [GENRE] and [GENRE] with a focus on [TAG].",
    "Recommend a [GENRE] and [GENRE] anime with [TAG].",
    "Suggest an anime with [TAG] and some [GENRE] themes.",
    "What [GENRE] anime has [TAG] as a central theme?",
    "I need an anime with [TAG], preferably in the [GENRE] genre.",
    "Can you find an anime with a mix of [GENRE] and [TAG]?",
    "Recommend a good [GENRE] anime that explores [TAG].",
    "I want to explore a [GENRE] anime without [TAG]."
]

# Data augmentation
augmentations = [
    "I'm looking for something similar to [EXAMPLE_ANIME].",
    "I enjoyed [EXAMPLE_ANIME], any recommendations like that?",
    "Can you suggest a new anime like [EXAMPLE_ANIME]?",
    "What's a good follow-up to [EXAMPLE_ANIME]?",
    "I've heard about [EXAMPLE_ANIME], but I want something different with [TAG].",
    "[EXAMPLE_ANIME] was amazing; what else is good in [GENRE]?"
]
example_anime = ["Naruto", "One Piece", "Attack on Titan", "Your Lie in April", "Steins;Gate", "Demon Slayer"]

# Generate 100 unique templates
unique_templates = set()

while len(unique_templates) < 100:
    # Randomly pick a base template and components
    template = random.choice(templates)
    genre1 = random.choice(genres)
    genre2 = random.choice(genres)
    tag = random.choice(tags)

    # Replace placeholders
    sentence = template
    if "[GENRE]" in template:
        sentence = sentence.replace("[GENRE]", genre1, 1)
        if "[GENRE]" in sentence:  # For multi-genre templates
            sentence = sentence.replace("[GENRE]", genre2, 1)
    if "[TAG]" in template:
        sentence = sentence.replace("[TAG]", tag, 1)

    # Add augmented examples
    if random.random() < 0.3:  # 30% chance to use augmentation
        aug_template = random.choice(augmentations)
        sentence = aug_template.replace("[EXAMPLE_ANIME]", random.choice(example_anime))
        if "[TAG]" in aug_template:
            sentence = sentence.replace("[TAG]", tag, 1)
        if "[GENRE]" in aug_template:
            sentence = sentence.replace("[GENRE]", genre1, 1)

    # Add the sentence to the set
    unique_templates.add(sentence)

# Print the results
for idx, template in enumerate(unique_templates):
    print(f"{idx+1}: {template}")


In [19]:
one_desc=df.iloc[12].description

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("summarization", model="facebook/bart-large-cnn")

In [186]:
def generate_summary(description):
    return summarizer(description, max_length=200, min_length=20, do_sample=False)[0]['summary_text']

In [None]:
generate_summary(one_desc)

In [22]:
import transformers

In [None]:
transformers.__version__


In [None]:
import torch._dynamo

# Suppress errors and fall back to eager mode
torch._dynamo.config.suppress_errors = True

sentences = [
    "Gold Roger was known as the Pirate King, the strongest and most infamous being to have sailed the Grand Line. The capture and death of Roger by the World Government brought a change throughout the world. His last words before his death revealed the location of the greatest treasure in the world, One Piece. It was this revelation that brought about the Grand Age of Pirates, men who dreamed of finding One Piece (which promises an unlimited amount of riches and fame), and quite possibly the most coveted of titles for the person who found it, the title of the Pirate King.Enter Monkey D. Luffy, a 17-year-old boy that defies your standard definition of a pirate. Rather than the popular persona of a wicked, hardened, toothless pirate who ransacks villages for fun, Luffy’s reason for being a pirate is one of pure wonder; the thought of an exciting adventure and meeting new and intriguing people, along with finding One Piece, are his reasons of becoming a pirate. Following in the footsteps of his childhood hero, Luffy and his crew travel across the Grand Line, experiencing crazy adventures, unveiling dark mysteries and battling strong enemies, all in order to reach One Piece."
]
embeddings = model.encode(sentences)

similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)

In [None]:
print(embeddings)

In [106]:
unique_tags = set()
for tags_list in df['tags']:
    tags = eval(tags_list)
    for tag in tags:
        unique_tags.add(tag['name'])

In [107]:
unique_genres = set(genre for sublist in df['genres'].apply(eval) for genre in sublist)

In [None]:
sentences

In [None]:
df.iloc[12].tags

In [None]:
def get_genres_and_tags(id):
    genres = eval(df.loc[df['id'] == id, 'genres'].values[0])
    tags = {tag['name']: tag["rank"] for tag in eval(df.loc[df['id'] == id, 'tags'].values[0])}
    return genres, tags

# Example usage
genres, tags = get_genres_and_tags(21)
print(f"Genres: {genres}")
print(f"Tags: {tags}")

In [115]:
meta_data_1=[]
for genre in genres:
    gen=f" One piece anime is a {genre} anime."
    meta_data_1.append(gen)
    for tag in tags:
        meta_data_1.append(f" One piece anime is a  {genre} and {tag} anime.")

In [None]:
meta_data_1

In [317]:
sentences = [
    "Gold Roger was known as the Pirate King, the strongest and most infamous being to have sailed the Grand Line. The capture and death of Roger by the World Government brought a change throughout the world. His last words before his death revealed the location of the greatest treasure in the world, One Piece. It was this revelation that brought about the Grand Age of Pirates, men who dreamed of finding One Piece (which promises an unlimited amount of riches and fame), and quite possibly the most coveted of titles for the person who found it, the title of the Pirate King.Enter Monkey D. Luffy, a 17-year-old boy that defies your standard definition of a pirate. Rather than the popular persona of a wicked, hardened, toothless pirate who ransacks villages for fun, Luffy’s reason for being a pirate is one of pure wonder; the thought of an exciting adventure and meeting new and intriguing people, along with finding One Piece, are his reasons of becoming a pirate. Following in the footsteps of his childhood hero, Luffy and his crew travel across the Grand Line, experiencing crazy adventures, unveiling dark mysteries and battling strong enemies, all in order to reach One Piece."
]

In [None]:
len("".join(sentences).split(" "))

In [None]:
unique_tags[:10]

In [5]:
df.dropna(subset=['description'], inplace=True)

In [None]:
df.shape

In [None]:
df.head()

In [9]:
df["cleaned_description"] = df["description"].apply(clean_text)

In [369]:
newdata=[]

In [370]:
for i in range(100):
    anime_genre, anime_tags = get_genres_and_tags(i)
    
    desc=df.iloc[i].cleaned_description
    for gen in anime_genre:
        row={}
        row["description"]=desc
        row["genre"]=gen
        row["score"]=1.0
        newdata.append(row)
    for tag in anime_tags.items():
        row={}
        row["description"]=desc
        row["genre"]=tag[0]
        row["score"]=tag[1]/100
        newdata.append(row)
    
    

    
    

In [371]:
thedata=pd.DataFrame(newdata)

In [None]:
thedata.head()

In [125]:
sentences.append(" ".join(tags))
sentences.append(" ".join(genres))

In [None]:
sentences

In [188]:
data_onepiece={}

In [None]:
import random

# Define base components


# Base templates
templates = [
    "Suggest me a [GENRE] anime with a [TAG].",
    "I want to watch a [GENRE] anime focused on [TAG].",
    "Looking for a [GENRE] anime with [TAG].",
    "Recommend me an anime with [TAG].",
    "Can you suggest a [GENRE] anime?",
    "Find me a [GENRE] anime with a lot of [TAG].",
    "What are some [GENRE] anime centered around [TAG]?",
    "Show me a good [GENRE] anime about [TAG].",
    "I'm in the mood for a [GENRE] anime with a [TAG].",
    "Are there any [GENRE] anime featuring [TAG]?",
    "Tell me about an anime with a lot of [TAG] elements.",
    "What is a must-watch anime with [TAG]?",
    "Give me a multi-genre anime combining [GENRE] and [GENRE] with a focus on [TAG].",
    "Recommend a [GENRE] and [GENRE] anime with [TAG].",
    "Suggest an anime with [TAG] and some [GENRE] themes.",
    "What [GENRE] anime has [TAG] as a central theme?",
    "I need an anime with [TAG], preferably in the [GENRE] genre.",
    "Can you find an anime with a mix of [GENRE] and [TAG]?",
    "Recommend a good [GENRE] anime that explores [TAG].",
    "I want to explore a [GENRE] anime without [TAG]."
]
# Data augmentation
augmentations = [
    "I'm looking for something similar to [EXAMPLE_ANIME].",
    "I enjoyed [EXAMPLE_ANIME], any recommendations like that?",
    "Can you suggest a new anime like [EXAMPLE_ANIME]?",
    "What's a good follow-up to [EXAMPLE_ANIME]?",
    "I've heard about [EXAMPLE_ANIME], but I want something different with [TAG].",
    "[EXAMPLE_ANIME] was amazing; what else is good in [GENRE]?"
]
example_anime = ["Naruto", "One Piece", "Attack on Titan", "Your Lie in April", "Steins;Gate", "Demon Slayer"]

# Generate 100 unique templates
unique_templates = set()

while len(unique_templates) < 100:
    # Randomly pick a base template and components
    template = random.choice(templates)
    genre1 = random.choice(genres)
    genre2 = random.choice(genres)
    tag = random.choice(tags)

    # Replace placeholders
    sentence = template
    if "[GENRE]" in template:
        sentence = sentence.replace("[GENRE]", genre1, 1)
        if "[GENRE]" in sentence:  # For multi-genre templates
            sentence = sentence.replace("[GENRE]", genre2, 1)
    if "[TAG]" in template:
        sentence = sentence.replace("[TAG]", tag, 1)

    # Add augmented examples
    if random.random() < 0.3:  # 30% chance to use augmentation
        aug_template = random.choice(augmentations)
        sentence = aug_template.replace("[EXAMPLE_ANIME]", random.choice(example_anime))
        if "[TAG]" in aug_template:
            sentence = sentence.replace("[TAG]", tag, 1)
        if "[GENRE]" in aug_template:
            sentence = sentence.replace("[GENRE]", genre1, 1)

    # Add the sentence to the set
    unique_templates.add(sentence)

# Print the results
for idx, template in enumerate(unique_templates):
    print(f"{idx+1}: {template}")


In [None]:
custom_data={}

for i in range(len(df)):
    anchor=random.choice(templates)
    if anchor
    custom_data["anchor"]= anchor

In [None]:
def max_words_in_description(df):
    """
    Returns the maximum number of words present in the 'description' column of the dataframe.

    Args:
        df (pd.DataFrame): The input dataframe containing a 'description' column.

    Returns:
        int: The maximum number of words in any description.
    """
    # Fill NaN values with an empty string
    df['description'] = df['description'].fillna('')
    return df['description'].apply(lambda x: len(clean_text(x).split())).max()

# Example usage
max_words = max_words_in_description(df)
print(f"Maximum number of words in description: {max_words}")

In [311]:
yi=df.description.iloc[12]

In [None]:
thedata

In [373]:
thedata.to_csv("thedata100.csv",index=False)

In [None]:
df.head()

In [None]:
ge ,tg=get_genres_and_tags(12)
tag_=""
for k,v in tg.items():
    tag_+=k+", "
tag_+=", ".join(ge)
tag_

In [27]:
def get_name_genres_tags_and_description(id):
    """
    Returns the genres and tags of a given id from the dataframe df.

    Args:
        id (int): The id of the dataframe row.

    Returns:
        str: A formatted string containing the name, genres, tags, and description.
    """
    genre, tags = get_genres_and_tags(id)
    name_en = df.loc[df['id'] == id, 'title_english'].values[0]
    name_jp = df.loc[df['id'] == id, 'title_romaji'].values[0]
    desc = df.loc[df['id'] == id, 'cleaned_description'].values[0]
    base = f"Name of the anime is {name_en} and {name_jp}. Genres are {', '.join(genre)} tags are {', '.join(tags.keys())} and description is {desc}"
    return base


In [66]:
one_peice=get_name_genres_tags_and_description(20923)
vinland_saga=get_name_genres_tags_and_description(105333)
toradora=get_name_genres_tags_and_description(113425)
new_1=get_name_genres_tags_and_description(101348)

In [None]:
one_peice

In [None]:
vinland_saga

In [None]:
toradora

In [None]:
new_1

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("Prashasst/anime-recommendation-model")



# [4, 4]

In [60]:
query="anime released in 2022"

In [None]:
embeddings = model.encode([query,one_peice,vinland_saga,toradora,new_1])

similarities = model.similarity(embeddings, embeddings)
print(similarities)

In [None]:
!pip install faiss-cpu

In [None]:
embeddings_id=[]
embeddings_lst=[]


In [162]:
embeddings_id2=[]
embeddings_lst2=[]

In [None]:
for i in range(9993,len(df)):
    if i%500==0:
        print(i)
    anime_id=df.iloc[i].id
    text=get_name_genres_tags_and_description(anime_id)
    emb=model.encode(text)
    embeddings_lst2.append(emb)
    embeddings_id2.append(anime_id)

In [None]:
len(embeddings_lst2)

In [165]:
embeddings_id.extend(embeddings_id2)
embeddings_lst.extend(embeddings_lst2)

In [168]:
np.save("embeddings_id.npy",embeddings_id)
np.save("embeddings.npy",embeddings_lst)

In [None]:
embeddings_lst[0].shape

In [None]:
len(embeddings_lst)

In [132]:
prashasst_favourits=[
    "Prashasst's favourite anime is One Piece",
    "Prashasst likes Vinland Saga",
    "Love, Chunibyo & Other Delusions is also one of Prashasst's favourite anime",
]

In [79]:
import faiss
import numpy as np

anime_embeddings = np.array(embeddings_lst)


# Build FAISS index
index = faiss.IndexFlatL2(768)  # L2 distance
index.add(anime_embeddings)  # Add embeddings in the same order as the anime_id_to_index


In [None]:
index

In [169]:
faiss.write_index(index, "anime_faiss.index")

In [89]:
query="Suggest a pirate anime like onepiece"
query_embedding = model.encode(query).reshape(1,-1)

In [None]:
query_embedding.shape

In [91]:
# Example query
# query_embedding = model.encode(query).reshape(1, -1)  # Reshape to 2D array
distances, indices = index.search(query_embedding, k=5)  # Search for top 5 matches

# indices is a 2D array: [[index1, index2, index3, ...]]


In [None]:
indices

In [None]:
for i in indices[0]:    
    id= embeddings_id[i]
    anime_name=df.loc[df['id'] == id, 'title_english'].values[0]
    print(anime_name)
    # print(get_name_genres_tags_and_description(embeddings_id[i]))

In [188]:
def recommend_anime(query, k=5):
    """
    Recommends anime based on a query using a FAISS index and a SentenceTransformer model.

    Args:
        query (str): The input query to find similar anime.
        index: The FAISS index to search for similar anime.
        model: The SentenceTransformer model used to encode the query.
        anime_embeddings: The embeddings of anime descriptions.
        embeddings_id: The ids of the anime embeddings.
        df (pd.DataFrame): The dataframe containing anime information.
        k (int): The number of recommendations to return.

    Returns:
        List[str]: A list of recommended anime titles.
    """

    #  index=index
    # model=model, anime_embeddings=anime_embeddings, embeddings_id=embeddings_id, df=df,


    # Encode the query
    query_embedding = model.encode(query).reshape(1, -1)  # Reshape to 2D array

    # Search for similar anime
    distances, indices = index.search(query_embedding, k=k)

    # Get the anime titles
    recommended_anime = []
    for i in indices[0]:
        anime_id = embeddings_id[i]
        # anime_name = df.loc[df['id'] == anime_id, 'title_english'].values[0]
        # if pd.isna(anime_name):
        #     anime_name = df.loc[df['id'] == anime_id, 'title_romaji'].values[0]
        recommended_anime.append(anime_id)

    return {"ids":recommended_anime}

In [None]:
query="indian moviee"
recommend_anime(query,k=15)

In [None]:
!pip install gradio

In [None]:
import gradio as gr


# Create the Gradio app
with gr.Blocks() as app:
    gr.Markdown("## Anime Recommendation System")
    
    with gr.Row():
        query = gr.Textbox(label="Enter your anime preferences or query:")
        top_k = gr.Slider(1, 10, value=5, label="Number of Recommendations")

    with gr.Row():
        recommend_button = gr.Button("Get Recommendations")
        output = gr.JSON(label="Recommended Anime")

    recommend_button.click(recommend_anime, inputs=[query, top_k], outputs=output)

# Launch the app
app.launch(share=True)


In [185]:
# Create the Gradio app
with gr.Blocks() as app:
    gr.Markdown("## Anime Recommendation System")
    
    with gr.Row():
        query = gr.Textbox(label="Enter your anime preferences or query:")
        top_k = gr.Slider(1, 10, value=5, label="Number of Recommendations")

    with gr.Row():
        recommend_button = gr.Button("Get Recommendations")
        output = gr.Textbox(label="Recommended Anime", lines=10)

    recommend_button.click(recommend_anime, inputs=[query, top_k], outputs=output)

# Launch the app
# app.launch(share=True)

In [190]:
emb=np.load("embeddings_id.npy")