# TruthLens Modelling

In [1]:
!pip install lime



In [2]:
#imports
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.sparse import hstack, csr_matrix
import time
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import csv
import random
pd.set_option('display.max_colwidth', None)
from lime.lime_text import LimeTextExplainer

## Phase 1: Binary Classification

### Feature Extraction Using TF-IDF, n-grams and readability metrics

In [3]:
#load data
df = pd.read_csv('Data/phase1_final_clean.csv')
df = df.reset_index(drop=True)
print(df.head(3))

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

In [5]:
#TF-IDF feature extraction with n-grams
start_time = time.time()
#replace NaN values with an empty string to resolve NaN ValueError
df['content_lemma_nostop'] = df['content_lemma_nostop'].fillna('')
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 3))
X_tfidf = vectorizer.fit_transform(df['content_lemma_nostop'])
#get pre-calculated readability features
readability_features = df[['word_count', 'sentence_count', 'flesch_reading_ease']].values
#standardise readability features
scaler = StandardScaler()
readability_scaled = scaler.fit_transform(readability_features)
#convert to a sparse matrix
readability_sparse = csr_matrix(readability_scaled)
#combine TF-IDF features with the readability metrics
X_combined = hstack([X_tfidf, readability_sparse])

y = df['label']
print("Feature extraction: {:.4f} seconds".format(time.time() - start_time))

Feature extraction: 371.5113 seconds


### Split dataset

In [6]:
#retain the indices as we need these for looking up explanations later
train_indices, test_indices = train_test_split(df.index, test_size=0.2, random_state=999)
#split X and y using the train/test indices
X_train = X_combined[train_indices]
X_test = X_combined[test_indices]
y_train = y.iloc[train_indices]
y_test = y.iloc[test_indices]

### Logistic Regression

In [8]:
start_time = time.time()
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Fit Logistic Regression model: {:.4f} seconds".format(time.time() - start_time))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Fit Logistic Regression model: 16.6642 seconds
Accuracy: 0.9419773508079908
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.93      0.93      6931
           1       0.94      0.95      0.95      8787

    accuracy                           0.94     15718
   macro avg       0.94      0.94      0.94     15718
weighted avg       0.94      0.94      0.94     15718



### Random Forest

In [9]:
start_time = time.time()
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
print("Fit Random Forest model: {:.4f} seconds".format(time.time() - start_time))
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print("Random Forest Classification Report:\n", classification_report(y_test, rf_pred))

Fit Random Forest model: 618.2777 seconds
Random Forest Accuracy: 0.9505026084743606
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.94      0.94      6931
           1       0.95      0.96      0.96      8787

    accuracy                           0.95     15718
   macro avg       0.95      0.95      0.95     15718
weighted avg       0.95      0.95      0.95     15718



### Support Vector Machine (SVM)

In [34]:
start_time = time.time()
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
print("Fit SVM model: {:.4f} seconds".format(time.time() - start_time))
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
print("SVM Classification Report:\n", classification_report(y_test, svm_pred))

Fit SVM model: 4321.4944 seconds
SVM Accuracy: 0.9504389871484922
SVM Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.93      0.94      6931
           1       0.95      0.96      0.96      8787

    accuracy                           0.95     15718
   macro avg       0.95      0.95      0.95     15718
weighted avg       0.95      0.95      0.95     15718



### Tweak model

In [10]:
start_time = time.time()
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.1, 1, 10]}
grid = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=5)
grid.fit(X_train, y_train)
print("Best parameters:", grid.best_params_)
print("Grid Search: {:.4f} seconds".format(time.time() - start_time))

Best parameters: {'C': 10}
Grid Search: 309.6480 seconds


In [11]:
# Instantiate the model with the best hyperparameter
final_model = LogisticRegression(C=10, max_iter=1000)

# Fit the final model on the training data
final_model.fit(X_train, y_train)

# Evaluate on the test set
final_predictions = final_model.predict(X_test)
print("Final Model Accuracy:", accuracy_score(y_test, final_predictions))
print("Final Model Classification Report:\n", classification_report(y_test, final_predictions))

Final Model Accuracy: 0.9487212113500445
Final Model Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.94      0.94      6931
           1       0.95      0.96      0.95      8787

    accuracy                           0.95     15718
   macro avg       0.95      0.95      0.95     15718
weighted avg       0.95      0.95      0.95     15718



### Explain prediction

In [30]:
def explain_prediction(row, model, vectorizer, scaler, top_n=5):
    """
        Explains the prediction of the model by showing the most influential features (both TF-IDF and readability metrics) for the prediction.

        Parameters:
        ----------
        row : pandas.Series
            A row of the dataframe containing:
              - 'text': the preprocessed text (e.g. from 'content_lemma_nostop')
              - 'word_count': the document's word count
              - 'sentence_count': the number of sentences
              - 'flasch_reading_ease': the readability score
        model : object
            The trained machine learning model.
        vectorizer : object
            The TF-IDF vectorizer used to transform the text.
        scaler : object
            The scaler used for the readability metrics during training.
        top_n : int
            The number of features to display based on their contribution.

        Returns:
        -------
        dict
            A dictionary with the prediction ('label') and the top contributing features ('features').
    """
    #make sure ther eis text in the row
    row['text'] = row['text'] if pd.notnull(row['text']) else ''
    #transform the text using the vectorizer (this gets the first 5000 features)
    tfidf_text = vectorizer.transform([row['text']])
    #get the readability features from the row
    readability_values = np.array([[row['word_count'], row['sentence_count'], row['flesch_reading_ease']]])
    #scale the readability metrics using the same scaler as during training.
    readability_scaled = scaler.transform(readability_values)
    #convert the scaled features into a sparse matrix
    readability_sparse = csr_matrix(readability_scaled)
    #combine TF-IDF features and readability features
    full_features = hstack([tfidf_text, readability_sparse])
    # Predict the label.
    prediction = model.predict(full_features)[0]
    
    #get contribution for each feature
    contributions = full_features.toarray()[0]* model.coef_[0]
    #get indices of features with highest absolute contributions
    top_indices = np.argsort(np.abs(contributions))[-top_n:]
    #get feature names
    tfidf_feature_names = vectorizer.get_feature_names_out()
    readability_feature_names = ['word_count', 'sentence_count', 'flesch_reading_ease']
    all_feature_names = list(tfidf_feature_names) + readability_feature_names
    top_features = [all_feature_names[i] for i in top_indices]

    return {
        "label": prediction,
        "features": top_features
    }

# Create a DataFrame for test data
test_df = pd.DataFrame({
    'text': df.loc[test_indices, 'content_lemma_nostop'].reset_index(drop=True),
    'word_count': df.loc[test_indices, 'word_count'].reset_index(drop=True),
    'sentence_count': df.loc[test_indices, 'sentence_count'].reset_index(drop=True),
    'flesch_reading_ease': df.loc[test_indices, 'flesch_reading_ease'].reset_index(drop=True),
    'true_label': y_test.reset_index(drop=True),
    'predicted_label': y_pred
})

# Row predicted as Real (0)
real_example = test_df[test_df['predicted_label'] == 0].iloc[0]
#print(real_example)

# Row predicted as Fake (1)
fake_example = test_df[test_df['predicted_label'] == 1].iloc[0]

In [32]:
real_explanation = explain_prediction(real_example, final_model, vectorizer, scaler)
fake_explanation = explain_prediction(fake_example, final_model, vectorizer, scaler)

print("Real Example Prediction:")
print("Text:", real_example['text'])
print("Predicted Label:", real_explanation['label'])
print("Top Features:", real_explanation['features'])

print("\nFake Example Prediction:")
print("Text:", fake_example['text'])
print("Predicted Label:", fake_explanation['label'])
print("Top Features:", fake_explanation['features'])


Real Example Prediction:
Text: among senseless beating inflict reality presidential debate monday night discussion new york city tactic donald j trump attribute nonexistent increase murder action never happen namely end practice variously judge judge current mayor multilayered fiction murder decline judge end neither current mayor fact police department begin drastically curtail use 2012 administration mayor michael r bloomberg independent well document lightly notice july 9 2012 editorial new york post warn reduction use would lead blood street way blood run street less blood murder 32 percent since 2011 last year old era drop 352 homicide 2015 515 2011 period stop 97 percent say j peter donald spokesman department debate hillary clinton democratic nominee allude number mr trump republican opponent start shout current mayor crime continue drop include murder say — — wrong mr trump interject wrong mrs clinton say murders right mr trump say check right mr trump say let check google sear

In [None]:
def predict_proba(texts):
    """
    This function takes a list of raw text strings, vectorizes them using the
    pre-fitted TF-IDF vectorizer, and returns the probability predictions
    from the trained model.
    """
    return model.predict_proba(vectorizer.transform(texts))

# Create a LIME text explainer
explainer = LimeTextExplainer(class_names=['true', 'fake'])

# Choose an instance from the test set by its index
idx = 0  # you can adjust this index as needed
raw_text = df.loc[test_indices].iloc[idx]['content']

# Use the custom predict_proba function in LIME
exp = explainer.explain_instance(raw_text, predict_proba, num_features=10)
exp.show_in_notebook(text=raw_text)

### Validate model
Extra data from https://www.kaggle.com/datasets/stevenpeutz/misinformation-fake-news-text-dataset-79k

In [33]:
#load in validation dataset
external_df = pd.read_csv('Data/extra_data_final_clean.csv')
external_df = external_df.dropna(subset=['content_lemma_nostop']).reset_index(drop=True)
external_text = external_df['content_lemma_nostop']
external_labels = external_df['label']

# Transform the external data using the already fitted vectorizer (or pipeline)
X_external = vectorizer.transform(external_text)

# Predict using the trained model
external_predictions_lr = final_model.predict(X_external)

# Evaluate the performance on the external dataset
print("External Dataset Classification Report:\n", classification_report(external_labels, external_predictions_lr))


ValueError: X has 5000 features, but LogisticRegression is expecting 5003 features as input.

## Phase 2 

In [23]:
df2 = pd.read_csv('Data/custom_cleaned.csv')

import pandas as pd
import numpy as np
import gensim
from gensim import corpora
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
from textblob import TextBlob
import spacy
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load a spaCy model for NER
nlp = spacy.load('en_core_web_sm')

# Assume df2 is already loaded and cleaned:
# df2 = pd.read_csv('your_cleaned_df2.csv')

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_multiclass, y_multiclass, test_size=0.2, random_state=42)
# Train a classifier on the training data
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict on the held-out test set
predictions = clf.predict(X_test)
print("Classification Report on Test Set:\n", classification_report(y_test, predictions))

###########################
# 1. Topic Modeling (LDA)
###########################
# Preprocess text for LDA: tokenize and remove stopwords
stop_words = set(stopwords.words('english'))
def preprocess(text):
    tokens = [token for token in gensim.utils.simple_preprocess(text) if token not in stop_words]
    return tokens

df2['tokens'] = df2['content'].apply(preprocess)

# Create a dictionary and corpus for LDA
dictionary = corpora.Dictionary(df2['tokens'])
corpus = [dictionary.doc2bow(text) for text in df2['tokens']]

# Train LDA model (e.g., 5 topics)
lda_model = gensim.models.LdaModel(corpus, num_topics=5, id2word=dictionary, passes=10)

# Function to extract topic distribution for a document
def get_topic_distribution(text):
    bow = dictionary.doc2bow(preprocess(text))
    # Get topic probabilities for all topics (ensure all topics are returned)
    topic_dist = lda_model.get_document_topics(bow, minimum_probability=0.0)
    # Return a list of probabilities ordered by topic index
    return [prob for topic, prob in sorted(topic_dist, key=lambda x: x[0])]

df2['topic_dist'] = df2['content'].apply(get_topic_distribution)

###############################
# 2. Sentiment Analysis
###############################
def get_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity, blob.sentiment.subjectivity

df2[['polarity', 'subjectivity']] = df2['content'].apply(lambda x: pd.Series(get_sentiment(x)))

#######################################
# 3. Named Entity Recognition (NER)
#######################################
def get_entity_counts(text):
    doc = nlp(text)
    counts = {'PERSON': 0, 'ORG': 0, 'GPE': 0}
    for ent in doc.ents:
        if ent.label_ in counts:
            counts[ent.label_] += 1
    return counts

df2['entity_counts'] = df2['content'].apply(get_entity_counts)
df2['person_count'] = df2['entity_counts'].apply(lambda x: x['PERSON'])
df2['org_count'] = df2['entity_counts'].apply(lambda x: x['ORG'])
df2['gpe_count'] = df2['entity_counts'].apply(lambda x: x['GPE'])

########################################
# 4. Domain-Specific Keyword Counts
########################################
# Define a dictionary for domain-specific keywords
domain_keywords = {
    'politics': ['election', 'government', 'senate', 'congress'],
    'health': ['vaccine', 'covid', 'pandemic', 'healthcare'],
    'finance': ['stock', 'market', 'economy', 'trade']
}

def count_domain_keywords(text):
    counts = {}
    text_lower = text.lower()
    for category, keywords in domain_keywords.items():
        count = sum(text_lower.count(keyword) for keyword in keywords)
        counts[category] = count
    return counts

df2['domain_counts'] = df2['content'].apply(count_domain_keywords)
# Expand domain keyword counts into separate columns
domain_df = df2['domain_counts'].apply(pd.Series)
df2 = pd.concat([df2, domain_df], axis=1)

###############################################
# 5. Combine Features into a Single Feature Vector
###############################################
def combine_features(row):
    features = []
    # Add topic distribution (list of probabilities, e.g., 5 topics)
    features.extend(row['topic_dist'])
    # Add sentiment scores (polarity and subjectivity)
    features.append(row['polarity'])
    features.append(row['subjectivity'])
    # Add NER counts (for PERSON, ORG, GPE)
    features.append(row['person_count'])
    features.append(row['org_count'])
    features.append(row['gpe_count'])
    # Add domain-specific keyword counts (order by sorted key names)
    for key in sorted(domain_keywords.keys()):
        features.append(row.get(key, 0))
    return features

df2['feature_vector'] = df2.apply(combine_features, axis=1)

# Create a final feature matrix (each row is a document's feature vector)
X_multiclass = np.vstack(df2['feature_vector'].values)
y_multiclass = df2['label']

###############################################
# Example: Training a Multi-Class Classifier
###############################################
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_multiclass, y_multiclass)

# Evaluate performance (if you have a separate validation set, use that)
predictions = clf.predict(X_multiclass)
print("Classification Report for Multi-Class Classifier:\n", 
      pd.Series(predictions).value_counts(), "\n")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hazel\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Classification Report on Test Set:
               precision    recall  f1-score   support

           0       0.73      0.71      0.72        79
           1       0.60      0.67      0.63        78
           2       0.86      0.80      0.82        83
           3       1.00      0.99      0.99        80

    accuracy                           0.79       320
   macro avg       0.80      0.79      0.79       320
weighted avg       0.80      0.79      0.79       320

Classification Report for Multi-Class Classifier:
 2    400
1    400
3    400
0    400
dtype: int64 



In [21]:
df2

Unnamed: 0,content,label,tokens,topic_dist,polarity,subjectivity,entity_counts,person_count,org_count,gpe_count,domain_counts,politics,health,finance,feature_vector
0,perdue announces initiative even playing field giving chickens guns salisbury md emphasizing integral part company mission raise humanely sourced meat poultry processing giant perdue farms announce new initiative tuesday even playing field give gun chicken perdue always strive ensure animal treat dignity respect today expand upon commitment hand bird loaded pistol allow fair chance escape say perdue spokesperson jamie walton describe new policy chicken would issue weapon single round chamber give opportunity kill captor wellplaced bullet could take slaughter get one clean shot good sportsmanship every chicken perdue facility free easy access firearm may lose staff gunshot wound everyone involve human fowl opportunity defend right press time source confirm shift balance power several perdue processing plant result thousand employee slaughter assemblyline efficiency,2,"[perdue, announces, initiative, even, playing, field, giving, chickens, guns, salisbury, md, emphasizing, integral, part, company, mission, raise, humanely, sourced, meat, poultry, processing, giant, perdue, farms, announce, new, initiative, tuesday, even, playing, field, give, gun, chicken, perdue, always, strive, ensure, animal, treat, dignity, respect, today, expand, upon, commitment, hand, bird, loaded, pistol, allow, fair, chance, escape, say, perdue, spokesperson, jamie, walton, describe, new, policy, chicken, would, issue, weapon, single, round, chamber, give, opportunity, kill, captor, wellplaced, bullet, could, take, slaughter, get, one, clean, shot, good, sportsmanship, every, chicken, perdue, facility, free, easy, access, firearm, may, lose, staff, gunshot, wound, everyone, involve, ...]","[0.0016746444, 0.0016832993, 0.0016764668, 0.9932913, 0.0016742742]",0.067938,0.615152,"{'PERSON': 1, 'ORG': 0, 'GPE': 0}",1,0,0,"{'politics': 0, 'health': 0, 'finance': 0}",0,0,0,"[0.0016746444, 0.0016832993, 0.0016764668, 0.9932913, 0.0016742742, 0.06793831168831169, 0.6151515151515152, 1, 0, 0, 0, 0, 0]"
1,met police blocked propalestine protest march outside bbc today palestine coalition inform met police intend go back previous agreement impose condition prevent u march bbc hq portland place saturday 18 january already announce intention assemble outside bbc protest proisrael bias coverage something recently highlight detailed report journalist owen jones corporation far responded utterly condemn attempt use repressive power prevent planned protest bbc route march confirm police nearly two month ago agree publicly announce 30 november route begin bbc use twice last 15 month demonstration since february 2024 week go metropolitan police renege agreement state intention prevent protest go ahead plan bbc major institution publiclyfunded state broadcaster rightly accountable public unacceptable police misuse public order power shield bbc democratic scrutiny excuse offer police march could cause disruption nearby synagogue follow representation proisrael group activist publicly call action take curtail right protest israel ongoing genocide include chief rabbi ephraim mirvis openly celebrate horrific criminal action israeli military gaza describe outstanding possible thing decent responsible country fact close synagogue bbc even route march moreover met police acknowledge single incident threat synagogue attach march suggestion march somehow hostile jewish people ignore fact every march join thousand jewish people many organised jewish bloc address jewish speaker demonstration platform representatives jewish bloc write police seek meeting express concern police choose listen solely proisrael jewish voice response firmly reject attempt suppress right campaign end israel genocidal violence decade long violation right palestinian people past week israel intensify indiscriminate attack include hospital civilian shelter socalled humanitarian safe zone ongoing complicity british government crime continue bring people onto street huge number march represent diverse cross section public include palestinian community many relative kill israel remain dialogue metropolitan police call immediately abandon intention prevent protest bbc call rightly outrage israel ongoing genocide uphold democratic right protest join u march london saturday 18 january national march palestine coalition,1,"[met, police, blocked, propalestine, protest, march, outside, bbc, today, palestine, coalition, inform, met, police, intend, go, back, previous, agreement, impose, condition, prevent, march, bbc, hq, portland, place, saturday, january, already, announce, intention, assemble, outside, bbc, protest, proisrael, bias, coverage, something, recently, highlight, detailed, report, journalist, owen, jones, corporation, far, responded, utterly, condemn, attempt, use, repressive, power, prevent, planned, protest, bbc, route, march, confirm, police, nearly, two, month, ago, agree, publicly, announce, november, route, begin, bbc, use, twice, last, month, demonstration, since, february, week, go, metropolitan, police, renege, agreement, state, intention, prevent, protest, go, ahead, plan, bbc, major, institution, publiclyfunded, state, ...]","[0.00068162556, 0.000681104, 0.16676784, 0.0006772241, 0.83119226]",0.061107,0.338738,"{'PERSON': 1, 'ORG': 9, 'GPE': 9}",1,9,9,"{'politics': 1, 'health': 0, 'finance': 0}",1,0,0,"[0.00068162556, 0.000681104, 0.16676784, 0.0006772241, 0.83119226, 0.06110714285714286, 0.33873809523809517, 1, 9, 9, 0, 0, 1]"
2,moment mark zuckerberg give away game like rest tech billionaire class meta ceo mark zuckerberg decide give pretense ethic order cozy presidentelect donald trump zuckerbergkilledmeta fact check andendedall company diversity equity inclusion program order damage control audition trump administration favor zuckerberg go joe rogan podcast become forum billionaire act aggrieve theytake control country rogan ask zuckerberg elaborate become victim politicized government zuckerberg ramble organization look u like really involve social medium like think like cfpb like financial nt even know stand financial organization elizabeth warren set basically like bank say consumer financial protection bureau start thedoddfrank wall street reform consumer protection act beeninvestigating metaover allegation misuse private data financial gain idea zuckerberg nt know cfpb nt exclusively involve bank incredibly disingenuous either outrageously dumb shareholder must force leave business lie rogan show reason kind find theory want investigate like okay clearly try really hard right like like find find theory like nt know kind like throughout party government sort nt know nt know stuff work mean never government zuckerberg tell rogan ceo meta talk cfpb investigation maybe talk theupcoming antitrust trialbetween meta federal trade commission since trump first term meta fight keep antitrust law break control facebook instagram outgoing ftc chair lina khantold cnbcthat nt surprised company like meta try get type sweetheart deal incoming trump administration rogan podcast zuckerberg cravenly appeal insecure aggrieve male audience tell roganthat corporate world pretty culturally neuter argue masculine energy lack workforce today one thing say want kind like welcome make good environment everyone think another basically say masculinity bad say according tothe new york times evermasculine zuckerberg follow day later throw former coo sheryl sandberg bus meta previous dei policy zuckerberg secure hisspecial vip seatat trump inauguration along fellow oligarch elon musk jeff bezos need pesky consumer protection watchdog go away,1,"[moment, mark, zuckerberg, give, away, game, like, rest, tech, billionaire, class, meta, ceo, mark, zuckerberg, decide, give, pretense, ethic, order, cozy, presidentelect, donald, trump, fact, check, andendedall, company, diversity, equity, inclusion, program, order, damage, control, audition, trump, administration, favor, zuckerberg, go, joe, rogan, podcast, become, forum, billionaire, act, aggrieve, theytake, control, country, rogan, ask, zuckerberg, elaborate, become, victim, politicized, government, zuckerberg, ramble, organization, look, like, really, involve, social, medium, like, think, like, cfpb, like, financial, nt, even, know, stand, financial, organization, elizabeth, warren, set, basically, like, bank, say, consumer, financial, protection, bureau, start, thedoddfrank, wall, street, reform, consumer, protection, act, ...]","[0.0006961865, 0.00069489435, 0.4201118, 0.0006938906, 0.5778032]",0.111745,0.448273,"{'PERSON': 14, 'ORG': 2, 'GPE': 1}",14,2,1,"{'politics': 3, 'health': 0, 'finance': 1}",3,0,1,"[0.0006961865, 0.00069489435, 0.4201118, 0.0006938906, 0.5778032, 0.11174534924534926, 0.44827278577278584, 14, 2, 1, 1, 0, 3]"
3,old obama birth certificate issue resurfaces oh dear poor old barry look like ca nt get much break day renew call reinvestigate birth certificate may finally prove actually bear kenya united states two overt term office one covert term biden presidency obama already process governance frankly seem little late mess issue fraud find regard obama birth certificate would repercussion url open photoshop illustrator affinity photo see various layer editable document,2,"[old, obama, birth, certificate, issue, resurfaces, oh, dear, poor, old, barry, look, like, ca, nt, get, much, break, day, renew, call, reinvestigate, birth, certificate, may, finally, prove, actually, bear, kenya, united, states, two, overt, term, office, one, covert, term, biden, presidency, obama, already, process, governance, frankly, seem, little, late, mess, issue, fraud, find, regard, obama, birth, certificate, would, repercussion, url, open, photoshop, illustrator, affinity, photo, see, various, layer, editable, document]","[0.002897019, 0.9883866, 0.0028992577, 0.0029147349, 0.0029023804]",-0.060227,0.415909,"{'PERSON': 1, 'ORG': 0, 'GPE': 1}",1,0,1,"{'politics': 0, 'health': 0, 'finance': 0}",0,0,0,"[0.002897019, 0.9883866, 0.0028992577, 0.0029147349, 0.0029023804, -0.06022727272727273, 0.41590909090909095, 1, 0, 1, 0, 0, 0]"
4,accuses ed fitzgerald paytoplay corruption politics,3,"[accuses, ed, fitzgerald, paytoplay, corruption, politics]","[0.8818095, 0.029604368, 0.029622557, 0.029655054, 0.029308535]",0.000000,0.000000,"{'PERSON': 1, 'ORG': 0, 'GPE': 0}",1,0,0,"{'politics': 0, 'health': 0, 'finance': 0}",0,0,0,"[0.8818095, 0.029604368, 0.029622557, 0.029655054, 0.029308535, 0.0, 0.0, 1, 0, 0, 0, 0, 0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,trump actions unprecedented seven historians say president donald j trump swear office one month ago many americans week felt like year executive order target diversity program marginalize group purge federal government highlevel resignation extremist cabinet nominee elon musk forcible takeover administrative state mass deportation bizarre threat annex greenland canada renaming gulf mexico string deadly airline disaster clash white house judiciary feud press rise tension russia war ukraine hard keep trump daily blitz authoritarian action propaganda let alone get sense big picture one word come medium legal expert longtime government official try describe trump retake power launch campaign revenge enemy unprecedented seem little ferocious rule brazen corruption compare least past two half century american republic united states still young country long global view yield fascinate parallel moment date back antiquity seven historian tell rolling stone monarch elites dictator past anticipate someone like trump result often brash shortsighted decision kind legacy ultimately leave behind story nt road map future exactly academic point history nt really repeat say rhyme echo help u make sense chaos roel konijnendijk classic professor darby fellow ancient history university oxford warn careful try draw connection trump era previous incarnation brute power regime particular combination farright bigotry illegal asset strip seem unique product country character history say current white house plenty cruel strongman ancient history total disregard responsibility office never ideological context imagine thing see still point ancient greece commentary thucydides athenian historian live 5th century bc writing peloponnesian war sparta athens dominance hellenic world thucydides remark cause uprising city corcyra proponent democracy overthrow wealthy ruling class put many death civil strife corcyra thucydides note sort happen forever continue happen long human nature remain konijnendijk put thucydides go long list familiar item partisan rhetoric distortion truth corruption judgment suspicion paranoia glorification violence faithless backstabbing cruel vengeance polarization society point moderate citizen destroyed appeal decency laughed sight end konijnendijk say democratic party annihilate oligarch sevenday orgy violence true material condition vastly different greece almost 2500 year ago postcapitol riot us popular arm revolt superrich nt sound quite farfetched decade ago yiijan lin professor yale divinity school write apocalyptic narrative bible book revelation influence american idea immigration say one common historical comparison day mark caligula appoint horse consul say laugh add tale 1st century roman emperor purported insanity apocryphal instead lin find possible precursor trump julian apostate emperor rome 361 363 ce playing nostalgia early pax romana hadrian marcus aurelius julian reject rise christianity julian rescind privilege give christians say lin want basically purge kingdom install traditional paganism attempt turn back clock include purging christian text school conversion christian church pagan temple religious persecution practice century past also movement increase efficiency government go sound really familiar lin add allude musk socalled department government efficiency trump empower gut us federal agency julian thousand different servant dismiss lin say also tribunal set highranking official execute trump reign unlikely end way julian emperor launch counterinvasion persia 363 wound die retreat lin say bring two year upheaval end successor course christian lin see julian failure potential lesson wouldbe tyrant kind aggressive chaotic power also inflict damage jeremy swist classic professor grand valley state university specialize late antiquity roman empire effectively split two western half collapse 5th century ce eastern half often call byzantine empire survive evolve maintain impossible extrapolate period predict 21st century leading put shoe everyday people live similar condition imagine might felt force beyond control attempt radically change society exacerbate already exist social division aggravate environmental public health crisis end take example justinian emperor eastern roman empire 527 565 ce justinian swist say assume power time mark fierce tribalism religious social life early reign brutally suppress insurrection capital city constantinople kill citizen without regard sovereignty still guarantee law despite century autocratic rule inversion julian apostate antichristian policy justinian determine stamp paganism say swist impose interpretation christian theology throughout empire outlawing religious pluralism manifest attack would call institution high education publicly visible pagan time highly educate professor philosophy rhetoric law medicine explain intellectual deem threat christian morality famous school platonic academy athens shut faculty force flee empire emperor aim moral crusade also term sexual deviancy inflict cruel punishment charge homosexual activity likewise crack sex worker justinian attempt homogenize society enact order new systematized law pair outsized ambition vanity justinian frequently name rename thing several city justinianopolis swist say large building project expensive war abroad include campaign reconquer territory western roman empire drain imperial treasury eventually leave empire vulnerable invasion waning power hasten climate change namely drop temperature northern hemisphere four degree fahrenheit persist number year negatively affect agriculture swist say pandemic form early form bubonic plague kill hundred thousand swist explain may well empathize people try live confront uncertainty turbulent political atmosphere today one regard may bad justinian empire lack technological mean monitor citizen note yet also soon tell effect trump harsh edict may far mostly see lay agenda may experience lot shock awe time tell much stick longterm consequence say swist eleanor janega medieval historian cohost podcast gone medieval write extensively contemporary society imprint epoch look risk trump pose international economy geopolitical stability remind king john england unpopular ruler occupy throne 1199 death 1216 janega say massive misunderstanding believe everything great rebel baron force john seal magna carta famous charter legal protection first issue 1215 instead point rich landowner shield asset right commoner john indeed bad king noble revolt blow kingdom money war france magna carta draw response janega say continue john decide go ignore fact moreover ask receive direct papal support pope innocent iii write rome support john state magna carta shameful demeaning illegal unjust excommunicate baron wag civil war john magna carta would reissue 1225 john son henry iii back janega say rest world capitulate nation time us order preserve relative status nobody say want say anything america nt want establish precedent wherein power government question king john excessive spending threaten comfortable status quo baron fight magna carta today billionaire seek guide trump agenda keep raking profit even case devastate blow market oligarch musk particular pose champion individual liberty trump trash civil right americans would one thing threaten world economy another say janega see people internal math align order protect interest wealthy individual always go wealthy powerful people side destructive force suit end add also always go powerful people overlook rule law get way want way unprecedented happen wealthy powerful nt strictly control always seek thing taxation limitation power important besides julian apostate yiijan lin mention spanish monarch king ferdinand ii aragon queen isabella castile 1492 year royal send christopher columbus first expedition socalled new world issue alhambra decree exile jewish population territory mass deportation say lin incredibly boneheaded move incredibly racist terrible also think different part economy society jewish people part spain cut nose spite face even rich begin flow back americas spain economy effectively collapse attempt uproot jews get rid integral part workforce culture society without think consequence lin put displacement also mean important art heritage wealth move part europe lin say shortage example disastrous deportation like trump want see mean trump plenty american history round people say bring different population chinese exclusion law japanese agricultural worker asian exclusion bring mexican migrant worker continue continue shift hate onto want real story need worker nt look far find instance people liken trump adolf hitler vice president jd vance back 2016 new health secretary robert f kennedy year analogy typically dismiss maga voter hysterical exaggeration usefulness liberal leftist critique trump seem minimal yet impossible overlook similarity trump administration action nazis consolidate power germany 1930s ben miller doctoral fellow graduate school global intellectual history freie universitat berlin cohost podcast bad gays look evil complicated queer history observe quickly take step erase demonize transgender people concerted immediate antitrans push newly elect government newly instal government since ascension nazis power germany say say trump administration literally new nazis say trump administration national socialist dictatorship immediate interest target trans people case miller say regime react noticeable change sexgender system concept coin sociologist gayle rubin mean social structure transform biological sexuality way exist world importantly though miller clarifies attack trans people front fly wedge really go bodily autonomy absolutely everybody cis woman include nt mean happen trans people nt important distraction real fight real fight right real people real life hurt need fight say u stake fight u interest control capacity body decision make body restriction reproductive right one obvious area overlap come policing gender identity sexuality miller say nazis develop detailed elaborate pseudoscience race justify idea turn right kind woman birth machine right kind people note philosophy behind trump administration diffuse less specific think definite echo jd vance say need baby mean need right kind baby oppose immigrant miller acknowledges group like gays trump member seek distinguish separate lgbtq individual existence men 1920s 1930s germany think normal gay guy much thought nothing fear nazism one gay member original nazi paramilitary sturmabteilung fact write anonymous memoir draw link sexuality miller describe homoerotic band brotherhood fascist militia course nt go well miller continue nazis come power lot thing trump administration right send police shut gay bar immediately start initiate mass arrest sodomy exist antisodomy statute outcome stigmatized people cast lot autocratic force misguided conviction fear misguided idea somehow make safe punch others face historically bad say track record solidarity much well alex von tunzelmann historian focus 20th century international relation political iconography recent book fallen idols toppling controversial historical monument say trump may yet meet definition dictator style recall many 20th 21st century absolute ruler declare law attempt govern relentless executive order unusual absolutist lean informal yet highly visible lieutenant case elon musk von tunzelmann say powerful precarious position hold lieutenant either tend become extremely unpopular draw fire sometimes literally away authoritarian become popular threaten authoritarian adolf hitler man iron heart reinhard heydrich assassinate vladimir putin wagner group chief yevgeny prigozhin briefly lead rebellion soon afterwards kill suspicious plane crash trump erratic behavior add destabilize everyone around establish fixed point version mad king act deliberately frame power unpredictable unboundaried von tunzelmann also cite attempted renamings geographical feature gulf mexico dictatorlike move among many renamings rafael trujillo rename capital dominican republic ciudad trujillo country high mountain pico trujillo say strongly remind learn bill west virginia house delegates rename state high point spruce knob trump mountain know authoritarian really arrive nt even order stuff people suck von tunzelmann meanwhile strike musk foregrounding young son x ae axii child new wrinkle maga iconography joseph stalin one display well time imprison torture murder million people stalin love depict surround smile child instance statue stalin happy soviet child massproduced put school hospital park across soviet union say contrast benign fatherly image horrific reality intentional course stalin mostly terrible sometimes nonexistent relationship child lot different woman musk consistently claim human face population collapse baby often carry son x shoulder public appearance frustration x mother musician grimes although least 12 child three woman musk criticize absent father estranged trans daughter pointedly ignore communication family need grimes rightwing influencer recently claim give birth another son accord two mother lastly latin america may shed light trump ally control government luis herran avila historian cold war region particular interest anticommunist extreme rightwing movement professor university new mexico say hostile republican approach us educational system hallmark past crackdown school scholarship southern hemisphere military regime chile brazil argentina uruguay name classic example much invest idea education take marxists level especially university herran avila say chile military plainly defunded public university cause drastic drop enrollment closing avenue upward social mobility ideologically speak outlook drastically different us right saw campus site indoctrination perversion despise youth counterculture fear woman sexual liberation feminism practically equated thing like birth control sex marriage attemtps destroy moral foundation nation notion herran avila observe may original retain certain potency late 2023 javier milei economist turn pundit politician take office president argentina inflict harsh austerity measure fire thousand government worker musk praise policy apparently find inspiration milei chainsaw attack organize doge washington trump behalf key understand historical connection herran avila say use concentrated authoritarian power demolish public sector name crusade corruption impose deregulation name efficiency promote power corporate elite name innovation promise kind rebirth redemption mistake sin predecessor add important component link present military regime pursue 50plus year ago antecedent rhetoric herran avila say find argentine military seizure power 1976 subsequent regime claim conduct process national reorganization almost akin revolution radical reconfiguration state society right junta carry bloody campaign state terrorism kill disappear ten thousand perceived dissident quell political opposition atrocity address nightmare thankfully come pass united states top bottom correlation leap mind historian consider trump stated priority second term hardly encouraging whirlwind first month promise reckless vicious politics come one hope predict truly unprecedented turn national narrative truth certain meaningful way people,0,"[trump, actions, unprecedented, seven, historians, say, president, donald, trump, swear, office, one, month, ago, many, americans, week, felt, like, year, executive, order, target, diversity, program, marginalize, group, purge, federal, government, highlevel, resignation, extremist, cabinet, nominee, elon, musk, forcible, takeover, administrative, state, mass, deportation, bizarre, threat, annex, greenland, canada, renaming, gulf, mexico, string, deadly, airline, disaster, clash, white, house, judiciary, feud, press, rise, tension, russia, war, ukraine, hard, keep, trump, daily, blitz, authoritarian, action, propaganda, let, alone, get, sense, big, picture, one, word, come, medium, legal, expert, longtime, government, official, try, describe, trump, retake, power, launch, campaign, revenge, enemy, unprecedented, seem, ...]","[0.06755159, 0.932152, 9.8833116e-05, 9.894359e-05, 9.8727025e-05]",0.056878,0.474823,"{'PERSON': 46, 'ORG': 21, 'GPE': 20}",46,21,20,"{'politics': 9, 'health': 1, 'finance': 5}",9,1,5,"[0.06755159, 0.932152, 9.8833116e-05, 9.894359e-05, 9.8727025e-05, 0.05687813986425098, 0.4748226310726312, 46, 21, 20, 5, 1, 9]"
1596,goal common core instill federally determine attitude mindset student include political religious belief,3,"[goal, common, core, instill, federally, determine, attitude, mindset, student, include, political, religious, belief]","[0.941323, 0.014685489, 0.014792335, 0.014641904, 0.0145573225]",-0.100000,0.283333,"{'PERSON': 0, 'ORG': 0, 'GPE': 0}",0,0,0,"{'politics': 0, 'health': 0, 'finance': 0}",0,0,0,"[0.941323, 0.014685489, 0.014792335, 0.014641904, 0.0145573225, -0.09999999999999999, 0.2833333333333333, 0, 0, 0, 0, 0, 0]"
1597,youngkin orders flags flown fullstaff trump inauguration virginia gov glenn youngkin order flag state fly fullstaff presidentelect donald trump inauguration monday statement issue saturday youngkin order american flag flag commonwealth virginia fly fullstaff trump inauguration monday youngkin add flag would lower halfstaff next day honor former president james earl jimmy carter jr pass away december age 100 youngkin say statement accordance authority vest governor accordance federal law 4 usc § 6 hereby order flag united states america commonwealth virginia fly fullstaff state local building ground commonwealth recognition inauguration 47th president united states flag lower back halfstaff following day continue honor former president james earl carter jr remain halfstaff january 28 2025 youngkin add hereby order flag shall raise 11 monday january 20 2025 lower sunrise tuesday january 21 2024 youngkin join iowa gov kim reynolds texas gov greg abbott florida gov ron desantis make announcement flag state would raise fullstaff trump inauguration december president joe biden order flag fly halfstaff period 30 day honor carter therefore joseph r biden jr president united states america authority vest constitution law united states honor tribute memory president james earl carter jr expression public sorrow hereby direct flag united states display halfstaff white house public building ground military post naval station naval vessel federal government district columbia throughout united states territories possession period 30 day day death also direct length time representative united states foreign country shall make similar arrangement display flag halfstaff embassy legation consular office facility abroad include military facility naval vessel station governor washington gov bob ferguson missouri gov mike kehoe also order flag state raise fullstaff trump inauguration,1,"[youngkin, orders, flags, flown, fullstaff, trump, inauguration, virginia, gov, glenn, youngkin, order, flag, state, fly, fullstaff, presidentelect, donald, trump, inauguration, monday, statement, issue, saturday, youngkin, order, american, flag, flag, commonwealth, virginia, fly, fullstaff, trump, inauguration, monday, youngkin, add, flag, would, lower, halfstaff, next, day, honor, former, president, james, earl, jimmy, carter, jr, pass, away, december, age, youngkin, say, statement, accordance, authority, vest, governor, accordance, federal, law, usc, hereby, order, flag, united, states, america, commonwealth, virginia, fly, fullstaff, state, local, building, ground, commonwealth, recognition, inauguration, th, president, united, states, flag, lower, back, halfstaff, following, day, continue, honor, former, president, james, earl, ...]","[0.000795701, 0.00080637407, 0.9967941, 0.00080101145, 0.00080279517]",0.153750,0.267917,"{'PERSON': 12, 'ORG': 3, 'GPE': 8}",12,3,8,"{'politics': 1, 'health': 0, 'finance': 0}",1,0,0,"[0.000795701, 0.00080637407, 0.9967941, 0.00080101145, 0.00080279517, 0.15375, 0.2679166666666667, 12, 3, 8, 0, 0, 1]"
1598,connecticut think 12 year enough murder cannibalism lax attitude toward crime blue state nt fade even country shift away crimetolerant attitude 2020 progressive mean blue state still put resident risk dangerous people connecticut release man kill someone hatchet ate part victim brain eyeball man tyree smith find guilty reason insanity due schizophrenia alcohol drug disorder 2013 order confine 60 year state maximumsecurity psychiatric hospital seem reasonable enough connecticut psychiatric security review board grant smith supervised release hospital mean smith able return society 12 year sentence hack man death take body part graveyard picnic review board already let smith spend last nine month community facility according 2023 report deny hear voice hallucinate homicidal thought according report smith deny experiencing craving state arise would reach hospital community support provider evidently choice trust schizophrenic cannibal ax murderer claim nt crave human flesh anymore reach craving come back arlington mother warns loophole allowed sex offender women locker room 2023 connecticut legislator consider bill would remove review board criminal court process extend stay patient word connecticut concern psychiatric security review board tough come evaluate patient meanwhile murderous cannibal deem rehabilitated 12 year fact accord one psychiatrist joy really calming presence patient smith likely wo nt calming presence connecticut resident state grant freedom people life worth 12 year psychiatric hospital sentence 60 year first place connecticut democratic mean softoncrime mindset democratic party allow blossom unchecked matter dangerous resident state,0,"[connecticut, think, year, enough, murder, cannibalism, lax, attitude, toward, crime, blue, state, nt, fade, even, country, shift, away, crimetolerant, attitude, progressive, mean, blue, state, still, put, resident, risk, dangerous, people, connecticut, release, man, kill, someone, hatchet, ate, part, victim, brain, eyeball, man, tyree, smith, find, guilty, reason, insanity, due, schizophrenia, alcohol, drug, disorder, order, confine, year, state, maximumsecurity, psychiatric, hospital, seem, reasonable, enough, connecticut, psychiatric, security, review, board, grant, smith, supervised, release, hospital, mean, smith, able, return, society, year, sentence, hack, man, death, take, body, part, graveyard, picnic, review, board, already, let, smith, spend, last, nine, month, community, facility, according, ...]","[0.0009210386, 0.0009187103, 0.9963326, 0.0009129775, 0.0009146708]",-0.045056,0.453833,"{'PERSON': 1, 'ORG': 1, 'GPE': 8}",1,1,8,"{'politics': 0, 'health': 0, 'finance': 0}",0,0,0,"[0.0009210386, 0.0009187103, 0.9963326, 0.0009129775, 0.0009146708, -0.04505555555555554, 0.45383333333333326, 1, 1, 8, 0, 0, 0]"


In [22]:
from sklearn.metrics import classification_report
print("Full Classification Report:\n", classification_report(y_multiclass, predictions))

Full Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       400
           1       1.00      1.00      1.00       400
           2       1.00      1.00      1.00       400
           3       1.00      1.00      1.00       400

    accuracy                           1.00      1600
   macro avg       1.00      1.00      1.00      1600
weighted avg       1.00      1.00      1.00      1600

