In [49]:
import pandas as pd
import numpy as np
import pickle

# importing a vectorizer -> this is how we get to the topic model
from sklearn.feature_extraction.text import CountVectorizer

In [40]:
# reading in the data
df = pd.read_json("ext_act_lang_filtered_pre_proc.ndjson", lines=True)

In [42]:
# loading the pickled model
lda_model = pickle.load(open('ext_act_lda_k15.pk', 'rb'))

In [44]:
# function to override the pre-proc that occurs within the vectorizer
# just returns the original string -> because I already had it clean
def dummy_func(x):   
    return x

In [45]:
def cust_tokenizer(x):
    return x.split()

In [52]:
# initializing a TFIDF vectorizer with unigram representation
# it's cheaper and it will allow for relative pruning (those terms appearing within fewer than .05% of docs or in more than 99%)
# feeding in our own functions for splitting and cleaning because otherwise it will mess up our plan
tf_vectorizer = CountVectorizer(
    min_df =.005, max_df = .99,  preprocessor=dummy_func, tokenizer=cust_tokenizer)

In [54]:
# tf vectorized fiting
tf_vectorizer.fit_transform(df["bo"])

<1266682x339 sparse matrix of type '<class 'numpy.int64'>'
	with 4742393 stored elements in Compressed Sparse Row format>

In [55]:
# vectorized docs
vectorized = tf_vectorizer.transform(df["bo"]) 

In [56]:
# makign the labels
lda_labels =  np.matrix(lda_model.fit_transform(vectorized))

In [57]:
# normalizing the topic distribution
doc_topic_dist = lda_labels/lda_labels.sum(axis=1)

In [58]:
# code to pull the indices for top topics
df["doc_labels"] = doc_topic_dist.argmax(axis=1)

In [59]:
# code to pull the proability for each top topic
df["label_probs"] = np.amax(doc_topic_dist, axis=1)

In [63]:
# getting the value counts here. 
df["doc_labels"].value_counts(normalize=True)*100

0     15.982780
3      9.155258
1      7.901115
5      7.617460
8      6.525474
7      6.262503
13     6.025664
14     5.972770
4      5.496802
6      5.392119
11     5.072228
9      4.929098
10     4.925546
12     4.428973
2      4.312211
Name: doc_labels, dtype: float64

In [78]:
# setting the display limit
pd.set_option('display.max_rows', 200) 

In [80]:
# exploring the 0th (1st) topic
pd.concat([df[(df["doc_labels"]== 0)].head(100), df[(df["doc_labels"]== 0)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
17,p,997c7e2c5e5b4d29b8986d74850455b2,20201023,1603491480000,75d997177c0944cba9d1e8c701ef5a4b,ViolentVixen29,theblaze.com,store choice,English,[0.9553281069],0,0.066667
19,p,8896272b37ce42dba6a2b684b716e7f6,20201205,1607196103000,d917baeb3bef4a0390e22b7a2d870f8c,1DRACARYS,image-cdn.parler.com,,English,[0.4049592614],0,0.066667
43,p,1cb9aafbdbeb4060969845722e02352c,20201113,1605269608000,e11890c2ffc348df9cb703651a3cc9c0,LibertyElaine,parler.com,echo echo,English,[0.4252257645],0,0.066667
46,p,71c871b5ea9742f19bdf76b779903db5,20200326,1585222448000,82bfccbefc8c4806827b538a59c09f19,Reddog1776,image-cdn.parler.com,flotus,English,[0.8152545094],0,0.066667
52,p,f96aec23e86240b2abac1e4414a10adb,20201118,1605742074000,75d997177c0944cba9d1e8c701ef5a4b,ViolentVixen29,theblaze.com,dude lost fucking mind,English,[0.9986974001000001],0,0.066667
66,p,23b0c236137841228e4cccd88d751ab2,20201220,1608489229000,75d997177c0944cba9d1e8c701ef5a4b,ViolentVixen29,infowars.com,glad worry sorry,English,[0.9978201985],0,0.066667
74,p,3225a704b28c48099cfb7989457b4663,20201220,1608503552000,75d997177c0944cba9d1e8c701ef5a4b,ViolentVixen29,justthenews.com,surprised,English,[0.9867810607],0,0.066667
88,p,95fca46e54354aa488dc871b43f02295,20201201,1606864223000,e11890c2ffc348df9cb703651a3cc9c0,LibertyElaine,parler.com,tomfitton,English,[0.6096220016],0,0.066667
96,p,ec9d9c98c10f4a68b682735acb533bf6,20200414,1586898500000,8b67993183a14587a001010058d089d2,chucknellis,image-cdn.parler.com,che guevara picture living room probably move ...,English,[0.9228671193],0,0.066667
105,p,a6d287ea76a643c587e472962186dd12,20201101,1604196294000,376432f536af4ce8ba808ad8640c7f61,JimLin,washingtonexaminer.com,fun idea,English,[0.6785570979000001],0,0.066667


In [81]:
# exploring the 1st (2nd) topic
pd.concat([df[(df["doc_labels"]== 1)].head(100), df[(df["doc_labels"]== 1)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
3,p,a0124e3f9afa4fe29d404e41e3ac9a5f,20201009,1602262587000,6e6d4f8e7479446f8f06d5d5c0fae9a3,AppleJax,image-cdn.parler.com,guilty charged election dirty democrat may pul...,English,[0.9819905758],1,0.302336
28,p,b34d46e1c49444fa8d39613be3627f0d,20201114,1605359643000,75d997177c0944cba9d1e8c701ef5a4b,ViolentVixen29,thefederalist.com,target understand #getwokegobroke work continu...,English,[0.9580666423],1,0.533333
64,p,5013029abbce4adaaac696ceca248867,20201105,1604542129000,eff3c4bd3a114cd69b05786f5dce1dfe,FordCynthia01,thegatewaypundit.com,call bullhit joe biden could get 10 people cam...,English,[0.8464639187],1,0.384288
67,p,d5fdb36c07f64d7c98d187c0e312b43c,20201102,1604358710000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,might missed authority found 27 missing child ...,English,[0.789926827],1,0.844444
80,p,f3623b398c624b33b3051397ce0e222f,20201018,1603056268000,eff3c4bd3a114cd69b05786f5dce1dfe,FordCynthia01,youtube.com,msnbc reporter know live tip clinton aide ask ...,English,[0.8003210425],1,0.466986
117,p,70b9fd58983b4a28a7fd66039a8c961c,20201101,1604240033000,87be30dacdc74cd2936bcbe4ece6010d,Bullscricker,image-cdn.parler.com,look could talk,English,[0.6213076115],1,0.688889
131,p,023dfb753c5b471fa606945e0c44db2a,20200627,1593218036000,6e6d4f8e7479446f8f06d5d5c0fae9a3,AppleJax,image-cdn.parler.com,wish could unite instead divide american shade...,English,[0.7387899756],1,0.31475
158,p,4e6d6937b60c4202be831ce692730685,20191122,1574445430000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,image-cdn.parler.com,damn right done generation cannot end fight kid,English,[0.7990193963000001],1,0.630487
159,p,75d3236e22ec445c8e727cd653f6f4b2,20200829,1598664604000,071af09d39374d65882822ebda8db29c,Pascaldibello,image-cdn.parler.com,hey fellow patriot people fucking militia let ...,English,[0.3987943828],1,0.844444
210,p,c8402b456f7b44f1ba4faa9d9c9bdc10,20190721,1563724213000,5f2d3b1cbea6451aa6f9c9f3cab36af9,GaryADePietro,i.imgur.com,hope make good use meme,English,[0.8651368022],1,0.813333


In [82]:
# exploring the 2nd (3rd) topic
pd.concat([df[(df["doc_labels"]== 2)].head(100), df[(df["doc_labels"]== 2)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
26,p,10879ab055e44139a877eb7a26718434,20201128,1606581270000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,video.parler.com,trust anything bill gate say part new world order,English,[0.9181049466000001],2,0.510026
42,p,d7904cd142f645e48899acaf2037839e,20200812,1597234156000,390e312a055dee172249e628bc2ddc22,Lloyd4man,breitbart.com,vp pick kamala harris voiced support defunding...,English,[0.942620337],2,0.766666
97,p,41871adb647144f187a63043c0f090ec,20200909,1599684955000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,lawandcrime.com,trump dangles scotus list bob woodward recordi...,English,[0.7050586939],2,0.493793
102,p,466e59edb1f04d14b415c3f88870a986,20201111,1605066018000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,cnbc.com,fauci put thousand yous restaurant bankruptcy ...,English,[0.8563774228000001],2,0.274444
104,p,ee48aa3a81fb44e18294b069bbbf5cb1,20200603,1591210914000,8b67993183a14587a001010058d089d2,chucknellis,,know democrat want charge gun manufacturer gun...,English,[0.9435888529],2,0.277672
170,p,1b131a70c3254b79affbec7339be61d3,20200824,1598240731000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,streamable.com,blm intimidate diner charlotte nc,English,[0.6631486416],2,0.533333
284,p,10345d793968410b8ff163677fce6606,20200822,1598055137000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,media.thedonald.win,best protest sign ever,English,[0.7783225179000001],2,0.376362
300,p,d3061424bb294346b9e85e0bcde1e44e,20200815,1597512405000,99526563c1ad4c11a6378fca0d1552df,InfoWars,infowars.com,germany migrant rape 11yearold get released 12...,English,[0.3314151168],2,0.688888
352,p,f6d22702350d443a92f4e8401288fc27,20201010,1602346559000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,image-cdn.parler.com,shame kamala,English,[0.7223418951],2,0.533333
411,p,4f1122587f89470195bd6b9f695646d7,20201125,1606340012000,f380a75fcc024f789582d428e310a218,CnsrvativeHusky,thegatewaypundit.com,huge pa lawmaker #communist governor wolf mand...,English,[0.9789121151000001],2,0.413505


In [83]:
# exploring the 3rd (4th) topic
pd.concat([df[(df["doc_labels"]== 3)].head(100), df[(df["doc_labels"]== 3)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
10,p,c155379bd6dd4f8f86c70d968d87a910,20200731,1596171514000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,newsweek.com,bill clinton went jeffrey epstein island 2 you...,English,[0.7237291932000001],3,0.317488
21,p,afd0aed6fe91494a8cffbd683be01031,20190614,1560545386000,8b67993183a14587a001010058d089d2,chucknellis,,radical left far conservative right decide ele...,English,[0.9360142946000001],3,0.599675
22,p,8d733e41dab747e3b67a8eca3e587aae,20200908,1599525500000,e11890c2ffc348df9cb703651a3cc9c0,LibertyElaine,image-cdn.parler.com,favorite state,English,[0.6382056475],3,0.533333
23,p,802cf7d32b034e7aa31f4e7cb4a62d47,20210101,1609477900000,d917baeb3bef4a0390e22b7a2d870f8c,1DRACARYS,image-cdn.parler.com,cheer patriot happy new year,English,[0.24262946840000002],3,0.301574
32,p,bd1df16eb5c64cc6bc907e5eba7266e7,20201127,1606442445000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,might missed watch obama blame trump border cage,English,[0.5847870111],3,0.48625
34,p,66fff4ac7ec74635a2860b90b5f66924,20201010,1602335796000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,breitbart.com,nunes everybody within obama orbit knew clinto...,English,[0.5717926025],3,0.688889
44,p,fc595f2b7f86402ebf7c92787aef9327,20200730,1596151979000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,newsmax.com,trump hail herman cain tweet newsmaxcom,English,[0.5606334805000001],3,0.425452
58,p,4ebe94dc790348e8995a507fef219ef9,20201017,1602964739000,8b67993183a14587a001010058d089d2,chucknellis,,love america vote donald trump hate america vo...,English,[0.8512661457],3,0.387302
61,p,7b60c1f0836d44a0a325f834d7210101,20201120,1605890579000,eff3c4bd3a114cd69b05786f5dce1dfe,FordCynthia01,thegatewaypundit.com,breaking dominion voting system back testifyin...,English,[0.8628184199000001],3,0.47703
65,p,2e6b213682b14961816de23e57238701,20200822,1598057747000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,bizpacreview.com,president trump democrat want indoctrinate chi...,English,[0.822252512],3,0.598671


In [84]:
# exploring the 4th (5th) topic
pd.concat([df[(df["doc_labels"]== 4)].head(100), df[(df["doc_labels"]== 4)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
40,p,b891dd3345bd44b7ad526ad0d19358a4,20201204,1607088646000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,must see james okeefe join sean hannity 3rd ni...,English,[0.604088068],4,0.509779
51,p,3ea721923bca4354898c2c2558c35a11,20200404,1586007170000,f26c742bddd2446c88ddef83965c1411,DancrDave,thegatewaypundit.com,reminder fired icig atkinson connected schiffs...,English,[0.8346447945000001],4,0.614532
89,p,f00eac21397f446a8d722c6c3c20660f,20200804,1596572536000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,reddit.com,lake george floyd video showing massive backla...,English,[0.9548317194],4,0.844444
94,p,57cf8e495a7041cabc7a4d64773943b8,20201201,1606824405000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,must see good start ron paul praise flynn pard...,English,[0.6259053946],4,0.548471
109,p,05425504eef84d7cb8e90f2499ec75f5,20200618,1592521418000,d7ec99ad413856a1ebf3efc34d667c83,LauraCovfefe,thegatewaypundit.com,far left defense official sentenced 30 month p...,English,[0.8325077295000001],4,0.370439
121,p,156763da16684fdb8efa6f0b8d6fefde,20201019,1603124140000,8b67993183a14587a001010058d089d2,chucknellis,breitbart.com,love ar15s joe biden coming joe bidens gun pol...,English,[0.8035354018],4,0.466923
147,p,c324f8abc0e4442d9643b0bc7110d565,20200827,1598560032000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,media.thedonald.win,see,English,[0.8979489803],4,0.533333
153,p,86e1b6d76d1a49f2bd9f16ca8b8af763,20200909,1599618171000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,image-cdn.parler.com,concern everyone unelected tech billionaire ta...,English,[0.9329111576],4,0.688888
186,p,b4f48712fb344e1cad1128cda38f1dbc,20200713,1594632255000,4831fcbb074444058224fb81a86dcbfc,Notter,image-cdn.parler.com,see sharing,English,[0.7827708125],4,0.533333
224,p,98494b189bcd431cb59b96929cff4a2f,20201125,1606292745000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,#2 story cgn u uk intel agency declare cyber w...,English,[0.525444746],4,0.589412


In [85]:
# exploring the 5th (6th) topic
pd.concat([df[(df["doc_labels"]== 5)].head(100), df[(df["doc_labels"]== 5)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
15,p,0246914d046849a48d2cbc542331b875,20190822,1566434292000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,m.arcamax.com,mainstream conservative like redflagged google...,English,[0.7681446075],5,0.461259
16,p,86b972bdb3844f67aedb263fabfc4ea2,20200915,1600204045000,1ada3334ad9449cf97c7a077eb9296de,Jayman2124,hotair.com,denver police officer file ethic complaint ant...,English,[0.9297281504],5,0.766667
20,p,690ae06819254fc499a60b4d8fdf0313,20201214,1607915743000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,#3 cgn right fck antifa proud boy brawl antifa...,English,[0.6234436035],5,0.66536
36,p,13a1962acb59423f9cb91e6f1b915bea,20200821,1597990162000,4831fcbb074444058224fb81a86dcbfc,Notter,image-cdn.parler.com,brutally hacked man machete police officer tas...,English,[0.9890937209],5,0.535083
37,p,423b9f9d251a442495a2f587c4b2a407,20200415,1586992298000,36513c29986442e69cececcb41ba6091,SamRiddle,twitchy.com,let eat ice cream pelosiathome congratulates d...,English,[0.883534193],5,0.291903
39,p,727ff44e719c4deabfa3bd68331541dc,20201130,1606725643000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,breaking news update joe biden hairline fractu...,English,[0.8827851415],5,0.38485
47,p,84ddeb80c02d44cc821d1c84bdfa5777,20201017,1602960657000,75d997177c0944cba9d1e8c701ef5a4b,ViolentVixen29,lawenforcementtoday.com,california patriot let u find puppy throwing b...,English,[0.6665690541],5,0.445145
48,p,e6e51387df5742eb901001d1d360ef8b,20200723,1595465394000,626d0f6025014c50800fa79997b9545d,TTexasRepublic,washingtonexaminer.com,state police number suggest spike virginia gun...,English,[0.912219584],5,0.314217
49,p,4390cf56f8fe4528a47d1104187f456c,20201115,1605472026000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,image-cdn.parler.com,guy easy catcher mask hack protect 98 attempte...,English,[0.8891665339],5,0.28307
56,p,15b2a7a401ce4f5890e04525a5e14ccc,20200205,1580919980000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,image-cdn.parler.com,right girl,English,[0.3616946042],5,0.533333


In [86]:
# exploring the 6th (7th) topic
pd.concat([df[(df["doc_labels"]== 6)].head(100), df[(df["doc_labels"]== 6)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
2,p,d755c2ce09e74c9b9b34953aae839554,20190829,1567070618000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,i.imgur.com,antifa touch trump supporter coming take infec...,English,[0.8768866658000001],6,0.65898
11,p,9b67d7d4a3744c58b5cda9469fcfbc03,20200901,1599001576000,626d0f6025014c50800fa79997b9545d,TTexasRepublic,facebook.com,speaker pelosi refusing address need coronavir...,English,[0.9465208650000001],6,0.431976
18,p,f175d77fe9474836aad7269bd99cb4cf,20201010,1602292087000,e11890c2ffc348df9cb703651a3cc9c0,LibertyElaine,facebook.com,real life terrifying experience anti patient c...,English,[0.7917537093],6,0.516461
27,p,3d5f44c0e1df41d5ac892da05171c169,20201130,1606703084000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,thegatewaypundit.com,proof native american arizona given tv tablet ...,English,[0.8132593036],6,0.688888
30,p,ab1256b6682247109ca38306a86a91bf,20201129,1606692003000,e11890c2ffc348df9cb703651a3cc9c0,LibertyElaine,parler.com,watermark real,English,[0.5155178905],6,0.533333
35,p,ca7f6d82c1204f5aa7ff0696d89e3cf1,20190905,1567707592000,9ba302eb09584878ae72c873d550eb11,doutingthomas1,www.newsmax.com,conservative group sue info comeys alleged fbi...,English,[0.8822106123000001],6,0.57494
50,p,b7419b41b2154bbb9743800de2d3d933,20201003,1601728989000,75d997177c0944cba9d1e8c701ef5a4b,ViolentVixen29,reddit.com,tom arnold turn patriot know,English,[0.8726580143],6,0.437617
54,p,32ac0f96525d436c87545904079a978f,20200606,1591468132000,ac05f68fc0b442a2bf380a435a0286c6,TexasTony,image-cdn.parler.com,really mean woke head far as cannot sleep hurt...,English,[0.9014547467],6,0.475616
60,p,d89a2257154c4d5f99d1dbc0026d80e6,20200909,1599659444000,46563f08c3a34bebbf4960877d77230f,Starblazer692003,americanthinker.com,pandemic history american thinker,English,[0.8663756251],6,0.688889
95,p,03359b8402cb4c48b7527ed8064f794d,20191009,1570627807000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,i.imgur.com,american matter skin color,English,[0.5502846837],6,0.688889


In [87]:
# exploring the 7th (8th) topic
pd.concat([df[(df["doc_labels"]== 7)].head(100), df[(df["doc_labels"]== 7)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
0,p,3619348b91524430882f2b887838a3e4,20201018,1602989910000,8b67993183a14587a001010058d089d2,chucknellis,noqreport.com,facebooks public policy manager global electio...,English,[0.8107684255000001],7,0.612907
5,p,d4d680d708f64c96a9a6b942d229711c,20200909,1599668772000,8b67993183a14587a001010058d089d2,chucknellis,breitbart.com,cnn #fakenews caught covering joe biden cnn us...,English,[0.6953911185],7,0.516861
7,p,eb003b137bb3479a895f680d136c58ad,20201019,1603084454000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,image-cdn.parler.com,biden owned china biden center university penn...,English,[0.9033064842],7,0.518865
57,p,a6b6c032cf2c4f35b13e025af7358e41,20201116,1605554208000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,breitbart.com,rand paul vow prevent biden lockdown cannot go...,English,[0.43300822380000004],7,0.553072
63,p,b2a18ab79c544286a826844b4124b5fe,20200910,1599706175000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,thegatewaypundit.com,biden claim 6000 member u military died corona...,English,[0.7763851881],7,0.402554
68,p,314956cd329e4aa9b5da95546839647f,20200912,1599884058000,8cb0c50fce61469c95869bfe5527edc2,ShannonPatriot,justthenews.com,riding dragon documentary alleges biden family...,English,[0.9055879116000001],7,0.616547
72,p,9c960a919a944c13a8d4fbf326807543,20201123,1606128047000,e3ad4aa0906f440d97f5fd252b3c5635,Batistamariam,youtube.com,stole minnesota biden used 2 strategy work,English,[0.9398302436],7,0.372864
116,p,067f6017a88c421780c630169e14bcbd,20200603,1591190717000,390e312a055dee172249e628bc2ddc22,Lloyd4man,thegatewaypundit.com,13yearold girl autism say attacked twice white...,English,[0.8448603749],7,0.506807
154,p,57938748d65441d8a33e1c16d671031b,20201015,1602765052000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,justthenews.com,barack le three week go obama missing biden ca...,English,[0.9649620056],7,0.514191
172,p,394a6ff449d6460fbcd0bfeaaf93ad05,20200621,1592770880000,69ed2f765addaf481a79a741e536e2f8,WorldTvlr,townhall.com,unfortunately symone lie even though joe biden...,English,[0.9250502586],7,0.791719


In [88]:
# exploring the 8th (9th) topic
pd.concat([df[(df["doc_labels"]== 8)].head(100), df[(df["doc_labels"]== 8)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
9,p,72039196b2f941288871b9394e478318,20201228,1609121023000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,#2 viewed cgn stacey abrams secures 1 million ...,English,[0.8201742768],8,0.585648
12,p,bec317413cd44169bbce9a4069039e33,20200904,1599183819000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,image-cdn.parler.com,breaking clinesmith turning wiseman potus able...,English,[0.4829572439],8,0.533333
14,p,b6eef36d982742719116e48a8dab447c,20201126,1606416946000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,#1 cgn right whistleblower biden implicated do...,English,[0.6473478079],8,0.6697
29,p,0bddb5eedff74ea39adbd082850a1a5f,20201110,1605041397000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,justthenews.com,texas assisted living employee charged 134 cou...,English,[0.9428226948],8,0.526322
55,p,04e0f5c52a3b42b6b7213b7a89553cc1,20201203,1607028903000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,thehill.com,trump hit barr voter fraud remark looked thehill,English,[0.7803825736000001],8,0.481341
71,p,a7cc8892a1ac465f89639ac69bf73d0a,20201112,1605207832000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,#3 viewed cgn fraud film female poll worker fi...,English,[0.7091240883000001],8,0.813333
87,p,602be8da996c447488768977f9ed5778,20200903,1599162223000,8b67993183a14587a001010058d089d2,chucknellis,image-cdn.parler.com,uninformed average voter,English,[0.9225598574],8,0.533333
91,p,430787e8b3a84be5acaf6ac9eb8f6f02,20200220,1582234106000,dda1191f538b4e46ad8380ee31c81f6a,Gayle7753,thegatewaypundit.com,letter 2000 antitrump exdoj lawyer outrage 27 ...,English,[0.8402741551],8,0.533333
123,p,3c395e8c1e2d4b9f83669e27e464699a,20201213,1607828009000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,#6 trending cgn kracken sidney powell new emer...,English,[0.677646637],8,0.746217
128,p,107c23c1242f4a3b9b546a2909ccd1fc,20201122,1606006638000,eff3c4bd3a114cd69b05786f5dce1dfe,FordCynthia01,revolver.news,explosive new data rigorous statistical analys...,English,[0.7657043934000001],8,0.685423


In [89]:
# exploring the 9th (10th) topic
pd.concat([df[(df["doc_labels"]== 9)].head(100), df[(df["doc_labels"]== 9)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
8,p,d1e9d86e830448c88b688eee73e1e011,20200925,1601055261000,8b67993183a14587a001010058d089d2,chucknellis,,god bless ronpaul praying god bless randpaul s...,English,[0.8366956711],9,0.688889
24,p,e4930bfc6c024eb7ab46755b3b315ce1,20200826,1598475954000,4831fcbb074444058224fb81a86dcbfc,Notter,image-cdn.parler.com,thank family,English,[0.9812156558],9,0.378688
25,p,914399af4bd44b5eb661cd190bdfc75a,20191012,1570885520000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,i.imgur.com,going stand radical leftist democrat america g...,English,[0.9471607208],9,0.422408
59,p,66ece1e23f2a4bde84258a40a0376a0a,20201202,1606874858000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,thepalmierireport.com,project veritas drop cnn tape,English,[0.4568171501],9,0.533333
69,p,9f63a860ecee4a3283873954d56283da,20201117,1605582531000,8cb0c50fce61469c95869bfe5527edc2,ShannonPatriot,townhall.com,israel demand apology cnn host comparing trump...,English,[0.6816611290000001],9,0.688888
76,p,cd281ec6d6ed4cceb4e1f739d38b8622,20200612,1591934408000,376432f536af4ce8ba808ad8640c7f61,JimLin,lifenews.com,one giant step baby federal court upholds miss...,English,[0.7096105218000001],9,0.307537
79,p,9ab7e9dd97d243738f35b482f80beee1,20201002,1601675191000,99526563c1ad4c11a6378fca0d1552df,InfoWars,infowars.com,covid19 sabotage,English,[0.4126416147],9,0.533333
84,p,d31d1816210d421f8b215bbebf086bdb,20201109,1604914095000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,thedonald.win,109421 353 pa mailin ballot returned mailed do...,English,[0.9519491196000001],9,0.390092
144,p,f1a4b6849f0a4f64af7a50ef4cf487af,20200824,1598232427000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,media.thedonald.win,god bless,English,[0.2163947225],9,0.533333
151,p,51ee0aa6f8234fb89f43bf8c973d308f,20201027,1603836767000,75d997177c0944cba9d1e8c701ef5a4b,ViolentVixen29,theblaze.com,newly installed justice begin scotus career di...,English,[0.9778986573],9,0.766666


In [90]:
# exploring the 10th (11th) topic
pd.concat([df[(df["doc_labels"]== 10)].head(100), df[(df["doc_labels"]== 10)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
62,p,191b78708cfa4dde96859ada36760ff4,20191014,1571047520000,69ed2f765addaf481a79a741e536e2f8,WorldTvlr,i.imgur.com,wishing parler friend happy,English,[0.2501042783],10,0.533333
90,p,25113e7c4d604d8fb78f8abb65cad7fb,20201129,1606623780000,8b67993183a14587a001010058d089d2,chucknellis,,seeing inside hour big event figured shot fire...,English,[0.9608045220000001],10,0.271926
92,p,530ecf9dd83141c2a6a1940b8f41bea9,20200915,1600142505000,8b67993183a14587a001010058d089d2,chucknellis,image-cdn.parler.com,god u come hell high water democrat father art...,English,[0.9084038138],10,0.880494
93,p,1686bdcd0c2d432b82b3393c2f6b4c81,20191024,1571876326000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,i.imgur.com,exactly true,English,[0.6073842049],10,0.357975
115,p,d14e5486258e462cad0770e5969d03ea,20200821,1597975581000,376432f536af4ce8ba808ad8640c7f61,JimLin,westernjournal.com,warned learned president keep word hidden agen...,English,[0.9995512366],10,0.611832
162,p,ebf988eb0e5646cab7d4b55d8786d30c,20200709,1594268198000,8b67993183a14587a001010058d089d2,chucknellis,image-cdn.parler.com,father art heaven hallowed thy name thy kingdo...,English,[0.9226944447000001],10,0.941667
168,p,a9647d441de0440d890992b030440b30,20200907,1599451095000,071af09d39374d65882822ebda8db29c,Pascaldibello,youtu.be,keep echoing,English,[0.8019369841],10,0.533333
176,p,50dd72323b9b46eca81f50482912bf8c,20200831,1598878716000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,firststateupdate.com,cdc 6 u covid19 death list covid19 death first...,English,[0.6332049966000001],10,0.494179
220,p,97632636d97a4dfbb68e80b6cd309acb,20200814,1597420683000,8b67993183a14587a001010058d089d2,chucknellis,fxn.ws,first go let u hope beginning durham probe exf...,English,[0.9473585486],10,0.370138
221,p,63154f3d0c8e477d9439509430029a85,20201207,1607379130000,626d0f6025014c50800fa79997b9545d,TTexasRepublic,youtu.be,time choosing,English,[0.5264617801],10,0.533333


In [91]:
# exploring the 11th (12th) topic
pd.concat([df[(df["doc_labels"]== 11)].head(100), df[(df["doc_labels"]== 11)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
13,p,287b40ec07f143cd9292affc3c4f78a0,20200904,1599196744000,1ada3334ad9449cf97c7a077eb9296de,Jayman2124,flip.it,trump make united nation splash appearing person,English,[0.9621492028],11,0.813333
45,p,1054061bfae84e3da1b4a141198bae47,20200809,1597007082000,1ada3334ad9449cf97c7a077eb9296de,Jayman2124,thegatewaypundit.com,italian explosive expert beirut blast massive ...,English,[0.7554392219],11,0.533333
101,p,fe4feb98e01540ef80511cc7fc8f608e,20201124,1606240559000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,thefederalist.com,like yous democrat chinese communist also comp...,English,[0.7175892591],11,0.332432
130,p,4908880d5d2d4ed880f6e41f03c146bf,20201113,1605300121000,eaed3c6667c44655bd2943f97146d81c,deniseramsey,,copied communist control act piece united stat...,English,[0.9419108033],11,0.570129
132,p,9e8f2e222c4c400b89dfbe3f4c3afcdc,20201002,1601617882000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,citizenfreepress.com,secret melania tape cnn release undercover rec...,English,[0.7438988686],11,0.460494
149,p,69fe0bbcf98544fa9399ac5b1a148776,20190626,1561569428000,d7ba5a8eabb24aaf8a47dd4e1806fb80,MAGAnificent7,illegalaliencrimereport.com,previously deported child molester rearrested ...,English,[0.8582258224],11,0.463354
211,p,1d3db889c5e44f72a20e9c3dd67a4cef,20201112,1605139335000,eff3c4bd3a114cd69b05786f5dce1dfe,FordCynthia01,nbcnews.com,chinese part hidden ownership growing scrutiny...,English,[0.8719624877000001],11,0.595249
230,p,f1ef61b17b094e41a56532ed5a556db4,20201216,1608158741000,eaed3c6667c44655bd2943f97146d81c,deniseramsey,,copied way step nothing change even trump get ...,English,[0.9029371738],11,0.487166
279,p,b97e9cdfc7ce4e21a76885813a9fc232,20201129,1606673248000,8b67993183a14587a001010058d089d2,chucknellis,,please follow like stuff goal hitting 60k frie...,English,[0.9711946845],11,0.579298
288,p,9a0eaeed71af423ebb83ac0280d3af9b,20200309,1583778079000,e9963ab2d6a240ddab6fabfc079950d9,Klonokid,jihadwatch.org,erdogan turkish military offensive religious w...,English,[0.9677872062],11,0.511554


In [92]:
# exploring the 12th (13th) topic
pd.concat([df[(df["doc_labels"]== 12)].head(100), df[(df["doc_labels"]== 12)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
4,p,857c1349a1cc4bfba84cb75a9a34ec93,20201026,1603746735000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,deadline.com,60 minute trump walkoff soar 17m viewer watche...,English,[0.7728871703],12,0.429591
33,p,f484549ff9d34849b41fd5999d39e7b4,20200724,1595623823000,071af09d39374d65882822ebda8db29c,Pascaldibello,image-cdn.parler.com,wtf sensitive content shit button,English,[0.8903380632000001],12,0.533333
53,p,ab47c3029a0a44e9b4a11b90755f8d5f,20200114,1578967776000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,image-cdn.parler.com,ag barr get handcuff,English,[0.5636285543],12,0.688888
73,p,47838a043340416c920edb3203fe8556,20191227,1577411303000,626d0f6025014c50800fa79997b9545d,TTexasRepublic,thegatewaypundit.com,disgraceful ig report reveals john mccain mule...,English,[0.6546390057],12,0.766667
114,p,fad8203fe2d544c4bffb88b7742340b5,20201007,1602035958000,626d0f6025014c50800fa79997b9545d,TTexasRepublic,,time credit tweet document finally declassifie...,English,[0.9789891839],12,0.510405
127,p,be946d8bdbbf488eb43de562dfbfd220,20190918,1568827077000,99526563c1ad4c11a6378fca0d1552df,InfoWars,www.infowars.com,study found around 90 plastic waste polluting ...,English,[0.9620899558],12,0.688888
141,p,38a9bc24f3bb48c8990f27ada810cc8c,20201117,1605644908000,5f2d3b1cbea6451aa6f9c9f3cab36af9,GaryADePietro,youtu.be,need laugh time lighten thing bit,English,[0.8060134649],12,0.295918
223,p,81d43753f936406cbfd721cc1ffb4a06,20200904,1599179049000,8cb0c50fce61469c95869bfe5527edc2,ShannonPatriot,theblaze.com,bill barr owns wolf blitzer mailin voting theb...,English,[0.550550878],12,0.424062
236,p,da99c6375a0941a781b729023a8ae67a,20201031,1604169121000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,image-cdn.parler.com,revenge dish best served cold november 3 red m...,English,[0.2211147994],12,0.533333
271,p,859bc612028245b88a9b08ff2e85fc94,20201124,1606231452000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,,patriot post hunter drop happenedpatriots alre...,English,[0.9685590267],12,0.548688


In [93]:
# exploring the 13th (14th) topic
pd.concat([df[(df["doc_labels"]== 13)].head(100), df[(df["doc_labels"]== 13)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
38,p,242c70dae4e74435881a3e90bdf1e83c,20201201,1606843008000,99526563c1ad4c11a6378fca0d1552df,InfoWars,infowars.com,breaking hospital executive say 2nd wave hoax ...,English,[0.8337230682000001],13,0.278308
85,p,3c48de1508a7428ca28a97ba883e68cd,20201210,1607580802000,9675e836a8c8467e8177744044cb58b0,Sirboring,saraacarter.com,facebook sued 46 state ftc alleged antitrust v...,English,[0.9513979554],13,0.358137
111,p,4eaa8d2eee924d70b6e526d16cca177b,20201013,1602548555000,99526563c1ad4c11a6378fca0d1552df,InfoWars,infowars.com,nursing home resident protest lockdown rather ...,English,[0.6197634339],13,0.469945
112,p,eb8d8b2947134f7291cba13a5b0882bf,20191007,1570464458000,9ba302eb09584878ae72c873d550eb11,doutingthomas1,www.foxbusiness.com,rachel camposduffy must start young fearless t...,English,[0.7810814381000001],13,0.423011
120,p,e226d9b43de646e2930dfee815c81db3,20201108,1604830149000,e3ad4aa0906f440d97f5fd252b3c5635,Batistamariam,image-cdn.parler.com,state investigated fraud one reach one better ...,English,[0.9343211651000001],13,0.544364
140,p,23eadf5c2fac49f3b1bddbc67d2175b7,20200812,1597216058000,4831fcbb074444058224fb81a86dcbfc,Notter,image-cdn.parler.com,noticed post lot shit sex trafficing pedophili...,English,[0.934464395],13,0.401316
183,p,9526a113ea4545a3af523115e1bf45f2,20201206,1607222022000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,#6 viewed cgn doctor raise alarm covid vaccine...,English,[0.6238253713],13,0.355557
243,p,e087b521c85e45798093b77d367abe3c,20190710,1562792004000,99526563c1ad4c11a6378fca0d1552df,InfoWars,infowars.com,researcher norway discovered radiation level e...,English,[0.9160011411000001],13,0.766666
246,p,394344b3a2f2460b847c9c8e8841e0a4,20201126,1606376595000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,,patriot post sidney powell absolute genius wai...,English,[0.9440826178],13,0.389091
261,p,8866dd3ada58400995d303e75b247e34,20200609,1591700459000,9675e836a8c8467e8177744044cb58b0,Sirboring,bizpacreview.com,jerry nadler becomes instant meme dem theatric...,English,[0.9367537498],13,0.533333


In [94]:
# exploring the 14th (15th) topic
pd.concat([df[(df["doc_labels"]== 14)].head(100), df[(df["doc_labels"]== 14)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
1,p,1365051bfe6243599e6af1055b71c4a2,20201202,1606897009000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,rumble.com,usps driver drop shocking claim 200000 ballot ...,English,[0.6809220314000001],14,0.516262
6,p,dd72fc970b7b4d8288e1c701c0627ff9,20201227,1609079264000,08743daa2c954428baf4d6f89d62785e,CounterGlobalist,counterglobalist.news,#3 right cgn president trump tweet china virus...,English,[0.7730042934],14,0.333612
31,p,51b911cc3aee426da245f60e880bfd58,20201104,1604478081000,8dcc2c1060964e32a772e7e06791d493,NickelNews,thegatewaypundit.com,cnn admits biden need 75 remaining pa vote ove...,English,[0.9092875123],14,0.317185
41,p,79f71ef823da4d01a8beeb59360f166e,20200720,1595241193000,4831fcbb074444058224fb81a86dcbfc,Notter,dailymail.co.uk,federal judge son 20 shot dead criminal defens...,English,[0.9883532524],14,0.333985
75,p,75d89345f58d49d9a45ca1d154cd12d6,20190724,1564012265000,ac05f68fc0b442a2bf380a435a0286c6,TexasTony,i.imgur.com,republican robert mueller gerry nadler today,English,[0.9768714309000001],14,0.371412
82,p,4ba3b1fbe3d04ae7b7d98aa175af23df,20200828,1598653698000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,breitbart.com,19 video leftwing protester harassing people r...,English,[0.753418386],14,0.47379
99,p,69c3d7b830274a7aac1cb2bf9cc9770b,20200707,1594154515000,82bfccbefc8c4806827b538a59c09f19,Reddog1776,youtu.be,911 urgent republican must watch douglas ducote,English,[0.3502854407],14,0.419581
103,p,b561504e8e5e448fb1d7b04f5026884d,20191117,1573950066000,626d0f6025014c50800fa79997b9545d,TTexasRepublic,www.dailymail.co.uk,bill barr say left sabotaging president waging...,English,[0.9283632636],14,0.302009
107,p,9fb3238a4798451698eef1f03c70f8f0,20201102,1604360575000,626d0f6025014c50800fa79997b9545d,TTexasRepublic,foxnews.com,president trump installs commission promote pa...,English,[0.864470005],14,0.345063
119,p,e79ebc3ed1d34d8bb70a5f1ec7b20af5,20201227,1609086572000,a2809039bf1bd1474de69f72165b6490,Alpha1957,lifenews.com,texas rep brian babin join republican challeng...,English,[0.7758851051000001],14,0.605176
