In [1]:
pip install faiss-cpu numpy pandas sentence-transformers


Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1


In [2]:
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss

# Load the kdrama data
kdrama = pd.read_csv('/content/kdrama.csv')

# Prepare data by combining necessary columns for a richer context
kdrama['combined_info'] = kdrama.apply(lambda row: f"Title: {row['Name']}. Genres: {row['Genre']} Cast: {row['Cast']} Overview: {row['Synopsis']} Tags: {row['Tags']} Year: {row['Year of release']} Network: {row['Original Network']}", axis=1)


In [7]:
kdrama['combined_info'][0]

"Title: Move to Heaven. Genres: Life,  Drama,  Family  Cast: Lee Je Hoon, Tang Jun Sang, Hong Seung Hee, Jung Suk Yong, Jung Young Joo, Lee Moon Shik Overview: Geu Roo is a young autistic man. He works for his father’s business “Move To Heaven.” Their job is to arrange items left by deceased people. One day, Geu Roo's own father dies. Geu Roo is left alone, but his uncle Sang Koo suddenly appears in front of him. Sang Koo is a cold man. He was a martial artist who fought in underground matches. He went to prison because of what happened at his fight. Sang Koo now becomes Geu Roo’s guardian. They run “Move To Heaven” together. Tags: Autism, Uncle-Nephew Relationship, Death, Savant Syndrome, Mourning, Tearjerker, Father-Son Relationship, Life Lesson, Ex-convict, Cleaning And Organizing Year: 2021 Network: Netflix"

In [3]:
# Initialize the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for the combined information
embeddings = model.encode(kdrama['combined_info'].tolist(), show_progress_bar=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

In [4]:
# Convert embeddings to float32 for FAISS compatibility
embeddings = np.array(embeddings).astype('float32')

# Create a Faiss index (Flat index - basic L2 distance)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)  # Add embeddings to the index


In [10]:
def search_kdrama(query, k=5):
    # Encode the query to the same embedding space
    query_vector = model.encode([query])[0].astype('float32')

    # Perform the search
    distances, indices = index.search(np.array([query_vector]), k)

    # Return the top k results
    results = kdrama.iloc[indices[0]]
    return results[['Name', 'Genre', 'Original Network', 'Synopsis']]


In [11]:
# Example query to test the system
query = "I'm looking for a romantic kdrama"
results = search_kdrama(query, k=3)
print(results)


                 Name                                   Genre  \
51     Dr. Romantic 2  Romance,  Drama,  Medical,  Melodrama    
211            Run On            Romance, Life, Drama, Sports   
182  One Spring Night         Romance, Life, Drama, Melodrama   

    Original Network                                           Synopsis  
51               SBS  A “real doctor” story set in a small, humble h...  
211    jTBC, Netflix  Ki Sun Gyeom is a sprinter on the national tea...  
182     MBC, Netflix  eeling trapped in a stale four-year relationsh...  
