In [1]:
!pip install transformers datasets torch scikit-learn





In [8]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# 1Ô∏è‚É£ Sample Hindi Dataset
data = {
    "text": [
        "‡§Ø‡§π ‡§´‡§ø‡§≤‡•ç‡§Æ ‡§¨‡§π‡•Å‡§§ ‡§Ö‡§ö‡•ç‡§õ‡•Ä ‡§π‡•à",
        "‡§Æ‡•Å‡§ù‡•á ‡§Ø‡§π ‡§Æ‡•ã‡§¨‡§æ‡§á‡§≤ ‡§™‡§∏‡§Ç‡§¶ ‡§®‡§π‡•Ä‡§Ç ‡§Ü‡§Ø‡§æ",
        "‡§∏‡•á‡§µ‡§æ ‡§¨‡§π‡•Å‡§§ ‡§ñ‡§∞‡§æ‡§¨ ‡§•‡•Ä",
        "‡§Ü‡§ú ‡§Æ‡•å‡§∏‡§Æ ‡§Ö‡§ö‡•ç‡§õ‡§æ ‡§π‡•à",
        "‡§Ø‡§π ‡§†‡•Ä‡§ï ‡§π‡•à",
        "‡§¨‡§π‡•Å‡§§ ‡§¨‡•á‡§ï‡§æ‡§∞ ‡§Ö‡§®‡•Å‡§≠‡§µ",
        "‡§Æ‡•Å‡§ù‡•á ‡§Ø‡§π ‡§™‡§∏‡§Ç‡§¶ ‡§Ü‡§Ø‡§æ",
        "‡§Ø‡§π ‡§∂‡§æ‡§®‡§¶‡§æ‡§∞ ‡§π‡•à",
        "‡§Ø‡§π ‡§â‡§§‡•ç‡§™‡§æ‡§¶ ‡§ñ‡§∞‡§æ‡§¨ ‡§π‡•à",
        "‡§Æ‡•à‡§Ç ‡§¨‡§π‡•Å‡§§ ‡§ñ‡•Å‡§∂ ‡§π‡•Ç‡§Å",
        "‡§Ø‡§π ‡§î‡§∏‡§§ ‡§π‡•à",
        "‡§Æ‡•Å‡§ù‡•á ‡§Ø‡§π ‡§™‡§∏‡§Ç‡§¶ ‡§®‡§π‡•Ä‡§Ç ‡§π‡•à"
    ],
    "label": [
        "Positive",
        "Negative",
        "Negative",
        "Positive",
        "Neutral",
        "Negative",
        "Positive",
        "Positive",
        "Negative",
        "Positive",
        "Neutral",
        "Negative"
    ]
}


df = pd.DataFrame(data)

# 2Ô∏è‚É£ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    df["text"],
    df["label"],
    test_size=0.3,
    random_state=42,
    stratify=df["label"]   # üî• IMPORTANT FIX
)

# 3Ô∏è‚É£ TF-IDF Vectorization
vectorizer = TfidfVectorizer(
    ngram_range=(1,2),      # unigrams + bigrams
    min_df=1
)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# 4Ô∏è‚É£ Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

# 5Ô∏è‚É£ Predictions
y_pred = model.predict(X_test_tfidf)

# 7Ô∏è‚É£ Test Custom Sentence
new_text = ["‡§Ø‡§π ‡§Æ‡•ã‡§¨‡§æ‡§á‡§≤ ‡§¨‡§π‡•Å‡§§ ‡§Ö‡§ö‡•ç‡§õ‡§æ ‡§π‡•à"]
new_text_tfidf = vectorizer.transform(new_text)
prediction = model.predict(new_text_tfidf)

print("\nSentiment Prediction:", prediction[0])



Sentiment Prediction: Positive


In [9]:
test_sentences = [
    "‡§Ø‡§π ‡§Æ‡•ã‡§¨‡§æ‡§á‡§≤ ‡§¨‡§π‡•Å‡§§ ‡§∂‡§æ‡§®‡§¶‡§æ‡§∞ ‡§π‡•à",
    "‡§Ø‡§π ‡§¨‡§π‡•Å‡§§ ‡§ñ‡§∞‡§æ‡§¨ ‡§π‡•à",
    "‡§Ø‡§π ‡§î‡§∏‡§§ ‡§â‡§§‡•ç‡§™‡§æ‡§¶ ‡§π‡•à"
]

test_tfidf = vectorizer.transform(test_sentences)
predictions = model.predict(test_tfidf)

for text, pred in zip(test_sentences, predictions):
    print(f"Text: {text}")
    print(f"Predicted Sentiment: {pred}")
    print("------")


Text: ‡§Ø‡§π ‡§Æ‡•ã‡§¨‡§æ‡§á‡§≤ ‡§¨‡§π‡•Å‡§§ ‡§∂‡§æ‡§®‡§¶‡§æ‡§∞ ‡§π‡•à
Predicted Sentiment: Positive
------
Text: ‡§Ø‡§π ‡§¨‡§π‡•Å‡§§ ‡§ñ‡§∞‡§æ‡§¨ ‡§π‡•à
Predicted Sentiment: Negative
------
Text: ‡§Ø‡§π ‡§î‡§∏‡§§ ‡§â‡§§‡•ç‡§™‡§æ‡§¶ ‡§π‡•à
Predicted Sentiment: Positive
------
