In [1]:
# 1.Import Required Libraries
import pandas as pd
import numpy as np
import pickle

In [2]:
# 2.Load Model & Vectorizer
with open("../models/fake_news_model.pkl", "rb") as f:
    model = pickle.load(f)
with open("../models/tfidf_vectorizer.pkl", "rb") as f:
    tfidf = pickle.load(f)

In [3]:
# 3.Get Feature Names
feature_names = tfidf.get_feature_names_out()

In [5]:
# 4.Extract Model Coefficients
#Logistic Regression learns weights for each word.
coefficients = model.coef_[0]

In [6]:
# 5.Top Words Indicating REAL News
top_real_indices = np.argsort(coefficients)[-20:]
top_real_words = [(feature_names[i], coefficients[i]) for i in top_real_indices]
top_real_words

[('representative', np.float64(3.0005040113652375)),
 ('year', np.float64(3.035453006189322)),
 ('said statement', np.float64(3.094571218634692)),
 ('thats', np.float64(3.409563237127188)),
 ('nov', np.float64(3.553110558887762)),
 ('monday', np.float64(3.5559777398745114)),
 ('obamas', np.float64(3.6155580473138036)),
 ('edt', np.float64(3.798861631628342)),
 ('im', np.float64(3.902225874060528)),
 ('friday', np.float64(3.9441003674296624)),
 ('thursday', np.float64(3.9663835758493815)),
 ('reuters president', np.float64(4.1957780917790215)),
 ('dont', np.float64(4.492691729119591)),
 ('tuesday', np.float64(4.499491247510516)),
 ('washington', np.float64(4.637799300147129)),
 ('wednesday', np.float64(4.752222107949722)),
 ('president donald', np.float64(5.296747756801945)),
 ('washington reuters', np.float64(9.347548419658658)),
 ('said', np.float64(15.973097390917838)),
 ('reuters', np.float64(23.47687229963327))]

In [7]:
# 6.Top Words Indicating FAKE News
top_fake_indices = np.argsort(coefficients)[:20]
top_fake_words = [(feature_names[i], coefficients[i]) for i in top_fake_indices]
top_fake_words

[('via', np.float64(-8.332770745911894)),
 ('image', np.float64(-6.762728965090447)),
 ('president trump', np.float64(-5.220149073914345)),
 ('obama', np.float64(-5.118184711452289)),
 ('gop', np.float64(-4.769254539767933)),
 ('hillary', np.float64(-4.624613328031649)),
 ('mr', np.float64(-4.48214352122727)),
 ('image via', np.float64(-4.211800909829309)),
 ('even', np.float64(-4.080263545741541)),
 ('american', np.float64(-3.7663283834551624)),
 ('rep', np.float64(-3.4341096163022233)),
 ('america', np.float64(-3.425055794737971)),
 ('sen', np.float64(-3.4048387934785995)),
 ('wire', np.float64(-3.397929047542443)),
 ('like', np.float64(-3.3949507902790175)),
 ('know', np.float64(-3.2636111031927943)),
 ('president obama', np.float64(-3.1368273508312874)),
 ('read', np.float64(-2.921519342172259)),
 ('daily', np.float64(-2.9214066004939636)),
 ('watch', np.float64(-2.9211452723554943))]

In [8]:
# 7.Convert to DataFrame (For Visualization)
real_df = pd.DataFrame(top_real_words, columns=["word", "weight"])
fake_df = pd.DataFrame(top_fake_words, columns=["word", "weight"])
real_df

Unnamed: 0,word,weight
0,representative,3.000504
1,year,3.035453
2,said statement,3.094571
3,thats,3.409563
4,nov,3.553111
5,monday,3.555978
6,obamas,3.615558
7,edt,3.798862
8,im,3.902226
9,friday,3.9441


In [9]:
fake_df

Unnamed: 0,word,weight
0,via,-8.332771
1,image,-6.762729
2,president trump,-5.220149
3,obama,-5.118185
4,gop,-4.769255
5,hillary,-4.624613
6,mr,-4.482144
7,image via,-4.211801
8,even,-4.080264
9,american,-3.766328


In [10]:
# 8.Explain Prediction for a Single Article
#This is very impressive in interviews.
def explain_prediction(text, top_n=10):
    vector = tfidf.transform([text])
    feature_index = vector.nonzero()[1]
    
    word_weights = []
    for idx in feature_index:
        word = feature_names[idx]
        weight = coefficients[idx]
        word_weights.append((word, weight))
    
    # Sort by absolute importance
    word_weights = sorted(word_weights, key=lambda x: abs(x[1]), reverse=True)
    
    return word_weights[:top_n]

In [11]:
# 9.Test Explainability
sample_text = """
Breaking news! This shocking revelation will change everything.
"""

cleaned_text = sample_text.lower()
explain_prediction(cleaned_text)

[('news', np.float64(-2.2087362827334114)),
 ('everything', np.float64(-1.0185856542998326)),
 ('shocking', np.float64(-1.0163250779792425)),
 ('change', np.float64(0.5968414573473306)),
 ('revelation', np.float64(-0.5915339432691714)),
 ('breaking', np.float64(-0.5099542064236586))]

#### üìå Output tells which words influenced the decision.

#### üß† How You Explain This in Interviews

‚ÄúInstead of treating the model as a black box,
I analyzed logistic regression coefficients to identify which words
push predictions toward fake or real news.
This improves trust and interpretability.‚Äù

- üíØ That answer stands out.

### ‚úÖ STEP 5 OUTPUT (What You Achieved)

- ‚úî Interpretable ML model
- ‚úî Feature-level explanations
- ‚úî Trustworthy predictions
- ‚úî Advanced ML understanding