In [1]:
pip install pandas scikit-learn nltk


Note: you may need to restart the kernel to use updated packages.


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report


In [23]:
import pandas as pd
df= pd.read_csv("Fake.csv")
print(df.head())
print(df.columns)

                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  
0  December 31, 2017  
1  December 31, 2017  
2  December 30, 2017  
3  December 29, 2017  
4  December 25, 2017  
Index(['title', 'text', 'subject', 'date'], dtype='object')


In [25]:
print(df.columns)

Index(['title', 'text', 'subject', 'date'], dtype='object')


In [26]:
X_train, X_test, y_train, y_test = train_test_split(
    df['text'],      # the news article content
    df['subject'],   # the category/label
    test_size=0.2,
    random_state=42,
    stratify=df['subject']
)

print(len(X_train), len(X_test))


18784 4697


In [27]:
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [28]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)
print(classification_report(y_test, y_pred))


                 precision    recall  f1-score   support

Government News       0.31      0.08      0.13       314
    Middle-east       0.08      0.07      0.08       156
           News       0.95      0.98      0.97      1810
        US_News       0.13      0.13      0.13       157
      left-news       0.21      0.13      0.16       892
       politics       0.48      0.67      0.56      1368

       accuracy                           0.61      4697
      macro avg       0.36      0.34      0.34      4697
   weighted avg       0.57      0.61      0.58      4697



In [30]:
!pip install imbalanced-learn


Collecting imbalanced-learn
  Downloading imbalanced_learn-0.14.0-py3-none-any.whl.metadata (8.8 kB)
Downloading imbalanced_learn-0.14.0-py3-none-any.whl (239 kB)
Installing collected packages: imbalanced-learn
Successfully installed imbalanced-learn-0.14.0


In [31]:
from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X_train_tfidf, y_train)

print("Before:", X_train_tfidf.shape, "After:", X_resampled.shape)


Before: (18784, 5000) After: (43440, 5000)


In [32]:
from sklearn.svm import LinearSVC
model = LinearSVC()
model.fit(X_resampled, y_resampled)


0,1,2
,penalty,'l2'
,loss,'squared_hinge'
,dual,'auto'
,tol,0.0001
,C,1.0
,multi_class,'ovr'
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,verbose,0


In [33]:
df['content'] = df['title'] + " " + df['text']


In [34]:
from sklearn.svm import LinearSVC

svm_model = LinearSVC(C=1.0, class_weight="balanced", max_iter=5000, random_state=42)
svm_model.fit(X_resampled, y_resampled)


0,1,2
,penalty,'l2'
,loss,'squared_hinge'
,dual,'auto'
,tol,0.0001
,C,1.0
,multi_class,'ovr'
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,verbose,0


In [35]:
y_pred_svm = svm_model.predict(X_test_tfidf)


In [36]:
from sklearn.metrics import classification_report, accuracy_score

print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))


Accuracy: 0.5439642324888226
                 precision    recall  f1-score   support

Government News       0.16      0.25      0.20       314
    Middle-east       0.08      0.08      0.08       156
           News       0.98      0.98      0.98      1810
        US_News       0.11      0.11      0.11       157
      left-news       0.21      0.21      0.21       892
       politics       0.41      0.35      0.38      1368

       accuracy                           0.54      4697
      macro avg       0.32      0.33      0.33      4697
   weighted avg       0.55      0.54      0.55      4697



In [37]:
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.1, 0.5, 1, 5, 10]}
grid = GridSearchCV(LinearSVC(class_weight="balanced", max_iter=5000, random_state=42),
                    param_grid,
                    cv=3,
                    scoring='f1_macro',
                    verbose=1)

grid.fit(X_resampled, y_resampled)

print("Best C:", grid.best_params_)


Fitting 3 folds for each of 5 candidates, totalling 15 fits
Best C: {'C': 10}


In [38]:
def predict_news_category(text):
    # Convert the input text into the same TF-IDF features
    text_tfidf = vectorizer.transform([text])
    
    # Predict using the trained SVM model
    prediction = svm_model.predict(text_tfidf)[0]
    
    return prediction


In [39]:
sample_text = "The government passed a new policy for economic reforms."
print("Predicted Category:", predict_news_category(sample_text))


Predicted Category: politics


In [40]:
examples = [
    "The stock market crashed due to global inflation.",
    "The Middle East peace talks resumed this week.",
    "The US president addressed the nation today."
]

for ex in examples:
    print(f"Text: {ex}\nPredicted: {predict_news_category(ex)}\n")


Text: The stock market crashed due to global inflation.
Predicted: politics

Text: The Middle East peace talks resumed this week.
Predicted: left-news

Text: The US president addressed the nation today.
Predicted: politics



In [41]:
import joblib

# Save the trained model
joblib.dump(svm_model, "news_classifier_model.pkl")

# Save the TF-IDF vectorizer
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")


['tfidf_vectorizer.pkl']

In [42]:
# Load model and vectorizer when needed
loaded_model = joblib.load("news_classifier_model.pkl")
loaded_vectorizer = joblib.load("tfidf_vectorizer.pkl")

def predict_with_loaded(text):
    text_tfidf = loaded_vectorizer.transform([text])
    prediction = loaded_model.predict(text_tfidf)[0]
    return prediction


In [43]:
print(predict_with_loaded("The prime minister discussed new reforms in parliament."))



politics


In [44]:
import os

print(os.listdir())


['.android', '.bash_history', '.cache', '.codegpt', '.gradle', '.idlerc', '.ipynb_checkpoints', '.ipython', '.jupyter', '.node_repl_history', '.nuget', '.packettracer', '.VirtualBox', '.vscode', 'ANITHA - Shortcut.lnk', 'AppData', 'Application Data', 'Cisco Packet Tracer 8.2.2', 'Contacts', 'Cookies', 'Documents', 'Downloads', 'Fake.csv', 'Fake.csv.zip', 'FakeNewsDetection.ipynb', 'Favorites', 'Links', 'Local Settings', 'Music', 'My Documents', 'my_flutter_app', 'n.json', 'NetHood', 'news_classifier_model.pkl', 'NTUSER.DAT', 'ntuser.dat.LOG1', 'ntuser.dat.LOG2', 'NTUSER.DAT{a2332f18-cdbf-11ec-8680-002248483d79}.TM.blf', 'NTUSER.DAT{a2332f18-cdbf-11ec-8680-002248483d79}.TMContainer00000000000000000001.regtrans-ms', 'NTUSER.DAT{a2332f18-cdbf-11ec-8680-002248483d79}.TMContainer00000000000000000002.regtrans-ms', 'ntuser.ini', 'OneDrive', 'PrintHood', 'Recent', 'Saved Games', 'Searches', 'SendTo', 'source', 'spam.csv', 'Start Menu', 'Templates', 'tfidf.ipynb', 'tfidf_vectorizer.pkl', 'tls.i

In [45]:
joblib.dump(svm_model, "news_classifier_model.pkl")
print("✅ Model saved as news_classifier_model.pkl")

joblib.dump(vectorizer, "tfidf_vectorizer.pkl")
print("✅ Vectorizer saved as tfidf_vectorizer.pkl")


✅ Model saved as news_classifier_model.pkl
✅ Vectorizer saved as tfidf_vectorizer.pkl


In [46]:
loaded_model = joblib.load("news_classifier_model.pkl")
loaded_vectorizer = joblib.load("tfidf_vectorizer.pkl")

sample = "The president met with world leaders to discuss climate change."
print("Predicted Category:", predict_with_loaded(sample))


Predicted Category: Government News


In [48]:
!pip install gradio



Collecting gradio
  Downloading gradio-5.45.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting audioop-lts<1.0 (from gradio)
  Downloading audioop_lts-0.2.2-cp313-abi3-win_amd64.whl.metadata (2.0 kB)
Collecting brotli>=1.1.0 (from gradio)
  Downloading Brotli-1.1.0-cp313-cp313-win_amd64.whl.metadata (5.6 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.6.1-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.13.0 (from gradio)
  Downloading gradio_client-1.13.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.11.3-cp313-cp313-win_amd64.whl.metadata (43 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1

In [49]:
import gradio as gr

def predict_news(text):
    text_tfidf = loaded_vectorizer.transform([text])
    prediction = loaded_model.predict(text_tfidf)[0]
    return prediction

iface = gr.Interface(
    fn=predict_news,
    inputs="text",
    outputs="label",
    title="📰 News Category Classifier",
    description="Type or paste a news article, and the model will predict its category."
)

iface.launch()


  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




Created dataset file at: .gradio\flagged\dataset1.csv


In [50]:
def predict_multiple(news_texts):
    # Split input into separate lines
    articles = [t.strip() for t in news_texts.split("\n") if t.strip()]
    
    # Convert each article to TF-IDF and predict
    text_tfidf = loaded_vectorizer.transform(articles)
    predictions = loaded_model.predict(text_tfidf)
    
    # Combine article + prediction
    results = { "Article": articles, "Predicted Category": predictions }
    return pd.DataFrame(results)


In [51]:
import gradio as gr

iface = gr.Interface(
    fn=predict_multiple,
    inputs=gr.Textbox(lines=10, placeholder="Paste one or more news articles (one per line)"),
    outputs="dataframe",
    title="📰 News Category Classifier",
    description="Paste one or multiple news articles (one per line), and the model will predict their categories."
)

iface.launch()


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




Created dataset file at: .gradio\flagged\dataset2.csv
