In [12]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Sample dataset
data = [
    {"description": "A high school girl finds herself in love triangles, facing social pressure and romance.", "category": "romance"},
    {"description": "A group of heroes sets off to defeat the dragon terrorizing the kingdom.", "category": "fantasy"},
    {"description": "A boy gains powers to fight evil forces threatening the city.", "category": "action"},
    {"description": "Two childhood friends reconnect and their feelings for each other grow into love.", "category": "romance"},
    {"description": "An ancient artifact gives its owner immense powers, but it comes with a curse.", "category": "fantasy"},
    {"description": "A detective is on the hunt for a criminal mastermind who is planning a heist.", "category": "action"},
    {"description": "A girl discovers a hidden world of magic and mythical creatures.", "category": "fantasy"},
    {"description": "A young couple faces hardships but their love grows stronger through it all.", "category": "romance"},
    {"description": "Warriors must band together to defeat an enemy force that threatens the world.", "category": "action"},
    {"description": "A prince must choose between love and duty while saving his kingdom.", "category": "romance"}
]

# Convert the data into a pandas DataFrame
df = pd.DataFrame(data)

# Feature and label extraction
X = df['description']
y = df['category']

# Convert text data into numerical features using TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer()
X_tfidf = tfidf_vectorizer.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.3, random_state=42)

# Initialize and train a Decision Tree classifier with controlled depth to avoid overfitting
dt_classifier = DecisionTreeClassifier(max_depth=5, random_state=42)
dt_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = dt_classifier.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy * 100:.2f}%")

# Sample predictions
sample_descriptions = ["A girl battles evil forces while falling in love.", 
                       "A hero must save his world from the dark lord."]
sample_tfidf = tfidf_vectorizer.transform(sample_descriptions)
predictions = dt_classifier.predict(sample_tfidf)

for desc, pred in zip(sample_descriptions, predictions):
    print(f"Description: {desc}\nPredicted Category: {pred}\n")


Model accuracy: 33.33%
Description: A girl battles evil forces while falling in love.
Predicted Category: romance

Description: A hero must save his world from the dark lord.
Predicted Category: fantasy

