In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Sample dataset
data = [
    {"description": "A high school love story where a shy girl falls for the popular guy.", "category": "romance"},
    {"description": "An epic battle between warriors to save the fantasy kingdom.", "category": "fantasy"},
    {"description": "A superhero gains powers and fights against a criminal organization.", "category": "action"},
    {"description": "Two souls meet through fate and uncover their past lives together.", "category": "romance"},
    {"description": "A detective uncovers secrets in a haunted town full of mystery.", "category": "fantasy"},
    {"description": "A martial artist seeks revenge against those who betrayed him.", "category": "action"},
    {"description": "A heartwarming story of childhood friends becoming lovers.", "category": "romance"},
    {"description": "Magic and creatures clash in a world where humans fight for survival.", "category": "fantasy"},
    {"description": "A mysterious figure emerges, taking down crime syndicates one by one.", "category": "action"},
    {"description": "Two rivals compete in a prestigious art school and discover hidden feelings.", "category": "romance"}
]

# Convert data to DataFrame
df = pd.DataFrame(data)

# Extract features and labels
X = df['description']
y = df['category']

# Convert text to numerical data using TF-IDF
vectorizer = TfidfVectorizer()
X_tfidf = vectorizer.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Train the Decision Tree Classifier
classifier = DecisionTreeClassifier(random_state=42)
classifier.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy * 100:.2f}%")

# Classify new descriptions
new_descriptions = [
    "A young couple navigates the challenges of high school romance.",
    "A warrior rises to save his land from dark forces.",
    "A vigilante targets the city's corrupt officials."
]
new_descriptions_tfidf = vectorizer.transform(new_descriptions)
predictions = classifier.predict(new_descriptions_tfidf)

# Display predictions
for desc, category in zip(new_descriptions, predictions):
    print(f"Description: {desc}\nPredicted Category: {category}\n")


Model accuracy: 0.00%
Description: A young couple navigates the challenges of high school romance.
Predicted Category: romance

Description: A warrior rises to save his land from dark forces.
Predicted Category: romance

Description: A vigilante targets the city's corrupt officials.
Predicted Category: romance

