In [None]:
# ============================================
# AG NEWS TEXT CLASSIFICATION - COMPLETE CODE
# ============================================

# 1Ô∏è‚É£ IMPORT LIBRARIES
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

# 2Ô∏è‚É£ LOAD DATASET (ADJUST PATH IF NEEDED)
train_df = pd.read_csv('/kaggle/input/ag-news-classification-dataset/train.csv')
test_df  = pd.read_csv('/kaggle/input/ag-news-classification-dataset/test.csv')

print("Columns in dataset:", train_df.columns)

# 3Ô∏è‚É£ MAP CORRECT COLUMNS
# AG News uses Class Index (1-4), convert to 0-3
X_train = train_df['Title']            # news text column
y_train = train_df['Class Index'] - 1  # labels converted to 0-based index

X_test = test_df['Title']
y_test = test_df['Class Index'] - 1

# CATEGORY NAME MAPPING
label_names = {0:'World', 1:'Sports', 2:'Business', 3:'Sci/Tech'}

# 4Ô∏è‚É£ BUILD MODEL (TF-IDF + LOGISTIC REGRESSION)
model = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_features=60000)),
    ('clf', LogisticRegression(max_iter=2000))
])

# 5Ô∏è‚É£ TRAIN MODEL
print("\nTraining the model...")
model.fit(X_train, y_train)
print("Model training completed! üéâ")

# 6Ô∏è‚É£ EVALUATE MODEL
y_pred = model.predict(X_test)

print("\n=========== MODEL PERFORMANCE ===========")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_names.values()))

# 7Ô∏è‚É£ MANUAL TESTING (ENTER YOUR OWN NEWS)
print("\n========== MANUAL NEWS CLASSIFICATION ==========\n")
print("Type any news headline and press ENTER.")
print("Type 'exit' to stop.\n")

while True:
    txt = input("Enter news headline: ")

    if txt.lower() == "exit":
        print("Prediction session ended.")
        break

    pred = model.predict([txt])[0]
    print("Predicted Category:", label_names[pred])
    print("-" * 50)


Columns in dataset: Index(['Class Index', 'Title', 'Description'], dtype='object')

Training the model...
Model training completed! üéâ

Accuracy: 0.8714473684210526

Classification Report:
              precision    recall  f1-score   support

       World       0.89      0.87      0.88      1900
      Sports       0.89      0.94      0.91      1900
    Business       0.85      0.83      0.84      1900
    Sci/Tech       0.86      0.84      0.85      1900

    accuracy                           0.87      7600
   macro avg       0.87      0.87      0.87      7600
weighted avg       0.87      0.87      0.87      7600



Type any news headline and press ENTER.
Type 'exit' to stop.



Enter news headline:  india win the cricket match


Predicted Category: Sports
--------------------------------------------------


Enter news headline:  i love snakes


Predicted Category: Sci/Tech
--------------------------------------------------


Enter news headline:   Elon Musk announces new Starship test launch next month


Predicted Category: Sci/Tech
--------------------------------------------------


Enter news headline:  stock market


Predicted Category: Business
--------------------------------------------------


Enter news headline:  sujeet is very high 


Predicted Category: Business
--------------------------------------------------


Enter news headline:  kushal is very friendly 


Predicted Category: Sports
--------------------------------------------------


Enter news headline:  Oil and Economy Cloud Stocks' Outlook


Predicted Category: Business
--------------------------------------------------
