In [2]:
import pandas as pd
import numpy as np
import pickle

# importing a vectorizer -> this is how we get to the topic model
from sklearn.feature_extraction.text import CountVectorizer

In [3]:
%%time
# reading in the data
df = pd.read_json("min_act_lang_filtered_pre_proc.ndjson", lines=True)

CPU times: total: 3min 19s
Wall time: 3min 23s


In [4]:
%%time
# loading the pickled model
lda_model = pickle.load(open('min_act_lda_k10.pk', 'rb'))

CPU times: total: 78.1 ms
Wall time: 130 ms


In [5]:
# function to override the pre-proc that occurs within the vectorizer
# just returns the original string -> because I already had it clean
def dummy_func(x):   
    return x

In [6]:
def cust_tokenizer(x):
    return x.split()

In [7]:
# initializing a TFIDF vectorizer with unigram representation
# it's cheaper and it will allow for relative pruning (those terms appearing within fewer than .05% of docs or in more than 99%)
# feeding in our own functions for splitting and cleaning because otherwise it will mess up our plan
tf_vectorizer = CountVectorizer(
    min_df =.005, max_df = .99,  preprocessor=dummy_func, tokenizer=cust_tokenizer)

In [8]:
%%time
# tf vectorized fiting
tf_vectorizer.fit_transform(df["bo"])

CPU times: total: 2min 25s
Wall time: 2min 25s


<21506207x291 sparse matrix of type '<class 'numpy.int64'>'
	with 87534048 stored elements in Compressed Sparse Row format>

In [9]:
%%time
# vectorized docs
vectorized = tf_vectorizer.transform(df["bo"]) 

CPU times: total: 2min 11s
Wall time: 2min 11s


In [10]:
%%time
# makign the labels
lda_labels =  np.matrix(lda_model.fit_transform(vectorized))

CPU times: total: 15min 22s
Wall time: 1h 59min 37s


In [11]:
%%time
# normalizing the topic distribution
doc_topic_dist = lda_labels/lda_labels.sum(axis=1)

CPU times: total: 1.02 s
Wall time: 1.02 s


In [12]:
# code to pull the indices for top topics
df["doc_labels"] = doc_topic_dist.argmax(axis=1)

In [13]:
# code to pull the proability for each top topic
df["label_probs"] = np.amax(doc_topic_dist, axis=1)

In [14]:
# getting the value counts here. 
df["doc_labels"].value_counts(normalize=True)*100

0    23.169943
3    10.838373
8    10.734608
6    10.462617
7     9.372792
5     9.287309
9     9.243490
4     6.850645
2     6.491247
1     3.548975
Name: doc_labels, dtype: float64

In [15]:
# setting the display limit
pd.set_option('display.max_rows', 200) 

In [16]:
# exploring the 0th (1st) topic
pd.concat([df[(df["doc_labels"]== 0)].head(100), df[(df["doc_labels"]== 0)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,bo,dmn,bo_lang,conf,doc_labels,label_probs
1,p,4504564717f2431c87b010f1c64017c4,20201113,1605281433000,0f46ea54a2bb41a294401c0353282f6a,ConservativeMomNetwork,#defundthemedia #politicalwarfare,image-cdn.parler.com,English,[0.7601499557],0,0.1
4,p,a0e1bf323d8e4510ba84abf4264a04d5,20200728,1595910297000,4a3abd08571b4a4d992edafcdda4ef28,bikergrl6,seriously,image-cdn.parler.com,English,[0.9601342678],0,0.1
7,p,2c6969e71ab64f8cbdaa6e918322c4e5,20201120,1605907778000,96c5e73831c3434b9131bcf77b94f805,Consereporters,never quibbled four year helped undermine prev...,,English,[0.9809647799000001],0,0.659763
14,p,5c03cbe67caf4de5b321295c2867a32c,20200720,1595208933000,289d2fe4908b4eca92c35f0ef86bb4a5,GayleTrotter,mark patricia mccloskey defended strong show f...,gayletrotter.com,English,[0.9595603943000001],0,0.819937
15,p,f9ce4867944941838cbb96922976a914,20210106,1609952460000,c01ce35cd1044153839aaf506a7d2bdf,jockowillink,jocko podcast 263 prolefeed telling think toda...,image-cdn.parler.com,English,[0.9548010826000001],0,0.372062
24,p,68d16f4e12e24f15be23cbc97eda0938,20201126,1606373926000,d0cef2c5c5764d93b41d5a8264b64409,EON2007,five finger death punch get transpiring united...,youtu.be,English,[0.8171772361],0,0.652494
26,p,0a1c4fd64e444efba1ba3598065bd9ae,20190603,1559566916000,47c110ea09754a05a7c85e283bbf95cb,kycal,mouth shut cheek puffed air way like demonstra...,i.imgur.com,English,[0.9670886993000001],0,0.399177
28,p,a371771a2f614c8f867079c68630800d,20201125,1606346073000,45a84df3adfd45408ebeccf3182b3b72,MrFitzgibbons,true #americafirst #obamaforprison,image-cdn.parler.com,English,[0.1351074427],0,0.549989
34,p,0e83bcc55e034deb8fd3613cf17c079e,20200915,1600186183000,7ceeb598e0e4415489b73b859041dc67,M3thods,occam razor redpill78 m3 130pmeastern,youtu.be,English,[0.8201216459],0,0.1
41,p,49bee3514505422bbf5d5fe18d538059,20201018,1603050371000,2a2f06df424e408aa7cbefc81c68995d,YourfriendlyneighbourhoodQAnon,utterly terrifying,image-cdn.parler.com,English,[0.9062694907000001],0,0.1


In [17]:
# exploring the 1st (2nd) topic
pd.concat([df[(df["doc_labels"]== 1)].head(100), df[(df["doc_labels"]== 1)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,bo,dmn,bo_lang,conf,doc_labels,label_probs
25,p,2603ddec7dd643da935a27c2100b9d96,20200721,1595312588000,84121222b5b043e79e154a7a0ffbdd9e,LordTaylor,yea caro new nickname iverson obviously answer,,English,[0.96303159],1,0.55
35,p,892074e84c314f91ae99b2a7b6671a71,20200721,1595310740000,3843950e79ec448e928506ad3eefb3c5,Dominicandubb,good baby,image-cdn.parler.com,English,[0.2827412188],1,0.54999
130,p,3eb20c0698e2444b841186a6acc8518f,20201116,1605554114000,7ab3906fdaa4445fa8fd2d36655fbdde,GovernorNoem,excellent conversation secretary brouillette m...,image-cdn.parler.com,English,[0.8920289874],1,0.428177
200,p,ba5a698e4cbe46dc896ec9076fcca080,20200627,1593287018000,41942671ec384f6eb2ef399edf8d491a,ahanksphoto,hello parler #newuser based #washingtondc take...,,English,[0.8738412261],1,0.420997
270,p,e6c3bfc872f54558999e5bd50d682094,20200703,1593811127000,0d073f3777664cdaaa4cff0285f819cd,Greenlee,shenaniganzzz found another great person follo...,media2.giphy.com,English,[0.7329064608],1,0.489359
282,p,b9717240834447b097d9a72e7b68cfb7,20201108,1604867592000,6b85bb8486824e619d82229c857e6c4d,Woodsjenn123,little break chaos today meet dahli smudge,image-cdn.parler.com,English,[0.9308102727],1,0.699919
299,p,270e2753379247ea947eb5cd35efe73e,20200729,1596034826000,24ce8aa2979b4b9b874b4a19049471d3,TheConfrank,good morning #parler,media2.giphy.com,English,[0.3674046993],1,0.54999
320,p,04e25f32b1b6487e8c9358033895a34e,20201111,1605119621000,a00b63c3bc9d4615a6571ba8b3b6b2df,WakeTheFallen,follow nasa image tell good cgi,,English,[0.9458702803000001],1,0.506367
401,p,e3343867db594298bf824a3ada000ac5,20210101,1609481297000,fbe4d444fa414cf4aedf1d968c84aeee,ShericeNichelle,compliance anymore admitting agree apparently ...,,English,[0.9615240693],1,0.405405
452,p,12bc10c1a7ba4024a741ad3db953c216,20201117,1605636651000,e7def11c72664f4bbc464e4a3cc731fc,harvanchik,fraudmistakes found georgia,zerohedge.com,English,[0.9382420182000001],1,0.55


In [18]:
# exploring the 2nd (3rd) topic
pd.concat([df[(df["doc_labels"]== 2)].head(100), df[(df["doc_labels"]== 2)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,bo,dmn,bo_lang,conf,doc_labels,label_probs
33,p,815cc67579a442b897b6d0fb9349cede,20200720,1595229478000,2feddb758afb4f59a5b4702da1783517,EmperorCarve,even horny anymore jus lonely,,English,[0.6521830559],2,0.549983
43,p,34dc1374f4b44b5f9d97c0f34afd8bf5,20201110,1605004601000,de287e8b6026980ea0372c95b19ceb80,DocG,thank joining u depth investigation republic r...,image-cdn.parler.com,English,[0.8906103969],2,0.303507
48,p,451baf6248f14e7f9161f491d2682567,20201017,1602952954000,e4a6b6702e9e4a22a823213eb5311bcd,Sangiorged,hmmm big guy going big house white house,,English,[0.8957259059],2,0.398882
49,p,92cbe7eb248f4bafbc256601e9f3b81d,20200621,1592772726000,8fddd5ac852e46a397d215f3ba1063e3,4MinorityInSouthAfrica,action antidote despair farm murder 21620 kame...,image-cdn.parler.com,English,[0.8360871077],2,0.611606
52,p,b7b9c36ca4bf4ba2b7088d13bd01261c,20200621,1592780549000,8460f2b1adbd468a99cc2e23b718504f,danwootton,already outrageous story day,image-cdn.parler.com,English,[0.9093696475],2,0.699962
54,p,b7a7bc68895947a0abb86de52fb7f142,20200723,1595546316000,edd9c20d658e42be844610f508ff4595,Trumpsport,get echo share save #wwg1wga #qanon #thegreata...,image-cdn.parler.com,English,[0.5426814556],2,0.549911
55,p,3d7499ee529947ce84767038378f21ef,20201128,1606581529000,9ca97ec7379e4ce39a7064ad1ee7f63d,onlyRealPatriots,fox news betrayed american people else done fo...,,English,[0.9746443629],2,0.409085
74,p,2717d54fb088463fbbc2f34f88071267,20201017,1602945952000,f08d638f25134360b9bcb036ce6d6e3e,Gm070719,saw fox news early morning truck containing 24...,image-cdn.parler.com,English,[0.9927933216],2,0.65373
87,p,03a815cf70fa471589fd0c21afae70fd,20201115,1605461633000,879bb6dfb6cb405d8f19cde4ab411af1,KalinJL,biggest news,youtu.be,English,[0.9071685076],2,0.549984
104,p,49bb6652c08c454da9181cd0bd591360,20201124,1606239422000,18f1dbe7960d4c75bffbd3e269bd833c,IvankaTrump,fact greenhouse gas generated yous slide 92 ye...,,English,[0.9063659310000001],2,0.368429


In [19]:
# exploring the 3rd (4th) topic
pd.concat([df[(df["doc_labels"]== 3)].head(100), df[(df["doc_labels"]== 3)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,bo,dmn,bo_lang,conf,doc_labels,label_probs
8,p,08923a1939a844ec97949bdf8a1243df,20200704,1593891882000,be63f5d23fd143a585069afa852cb828,FnFUpdates,happy independence day,,English,[0.6980830431],3,0.699988
10,p,47b7fb76f9a948f4a979340085049954,20200620,1592656420000,efc8e2f6ab9a4912bd51a561366663da,HeyBulldog,want teach twitter lesson blatant bias censors...,,English,[0.9502445459000001],3,0.631631
16,p,60f29ebc201b44e8a66d83d074f8c917,20200720,1595215870000,a1105c50fc014c29ab795778dbf1f2e7,PoloG,10 echo post butt,,English,[0.278604418],3,0.549966
19,p,d1821c35824e46f9971b96f7adfccc76,20200626,1593139380000,3a69967e395b462ca8997f9268a119d9,AnniCyrus,happy leave #maga,,English,[0.8493505716],3,0.549992
22,p,a820937799234eada709d861cb372686,20200809,1596988698000,c58c788116444503bf05ade83429d851,JesseKellyDC,maybemaybe problem cop,,English,[0.8640967011],3,0.549964
27,p,c51f3546ae5e41688c5c10c666e6f38a,20201123,1606158682000,322cad44f10d4884b1216b8cdf5e64f6,Rohit01,problem bande matram probem chanting bharat ma...,,English,[0.9777516127],3,0.603391
38,p,6df560bd9aa645fe8699ceff74aead60,20201119,1605819452000,8fab0a4e8f514c04aede109cf732296f,AriuumApp,real hate crime day orwellian intimidation wie...,,English,[0.6089842319000001],3,0.520124
40,p,09b80cea58c34d089660c00cfaf45bd1,20200607,1591497806000,af1d43fb8b3841eebaa2146642e6e2fe,TysonSnedeker,encourage family friend sign #parler exciting ...,,English,[0.9422171712],3,0.774976
60,p,dc46ee302afe499bab77bb29dccb3877,20201121,1605998741000,e9b3582a05c94f06acd4c1c986c7e5df,realalexclark,bathing suit kimono col1972 global ambassador ...,image-cdn.parler.com,English,[0.8807724118],3,0.386506
61,p,1ae1c174cffd435f83a2fa2fde3df958,20201112,1605161076000,bbc0de4e0ede474ea72f7a66af452079,TheSavvyTruth,man totally messed forgot say happy veteran da...,image-cdn.parler.com,English,[0.8689295053],3,0.600501


In [20]:
# exploring the 4th (5th) topic
pd.concat([df[(df["doc_labels"]== 4)].head(100), df[(df["doc_labels"]== 4)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,bo,dmn,bo_lang,conf,doc_labels,label_probs
20,p,49479743c8b047db95259c998f07bea8,20201108,1604856919000,a6fb7030d17e47f4ae4fb75c9e190d72,BowlifeLevi,first post parler let u start saying maga,,English,[0.8862661719],4,0.337595
29,p,8272a85ba6f6482b81d283ea265c821f,20200624,1593030977000,f0b1604d2e65446bb9ed0e529f0e3f85,DigitalSoldier17,worth remembering feb 2019 scale generates dis...,theguardian.com,English,[0.8996040821],4,0.354866
39,p,4578e527ce4a40f08f6132a2f1d07c00,20201126,1606362328000,a40f7d0d0cee4dea9dff73207013b630,AntiCorporateMedia,need honor patriot fight keep freedom sacrific...,image-cdn.parler.com,English,[0.7488930821],4,0.556756
44,p,90f353e9cf664f80acf53e0fc3b21815,20201009,1602264240000,8e338e4a83c04699a64f246dca19ab4b,Adamfbrazzel,time,image-cdn.parler.com,English,[0.3391611576],4,0.549928
46,p,b85dce383eac48f9ae9f5f4f1550168a,20200621,1592702836000,5fdccbabe9804e5d8ed1c964afa7df20,sydneywatson,okay fine start posting,,English,[0.9303532243],4,0.54995
70,p,686c33ad1d9942dea24486157d319d01,20201119,1605744527000,0afdbb55e0fc46168413e5ef442f01bf,Stopthesteal911,happening georgia past weekend stand guy fight...,image-cdn.parler.com,English,[0.9464908838],4,0.687777
73,p,7633a80db1314c4bb96089ff2a6ec768,20201110,1605024941000,4c466b8a13374133b7f10dc2b7fa62e1,Leahrockwell,echo agree,image-cdn.parler.com,English,[0.3879427016],4,0.55
78,p,b7dddf0fe816421fb9d77d73298db9f5,20201007,1602073394000,efcf3478c3394c1b9d4e2fb3894c37f4,EricTrump,miami florida join family thursday october 15t...,image-cdn.parler.com,English,[0.8963984847],4,0.819995
100,p,c12d1dc25bfe4361a23477f04fac9400,20200629,1593451034000,97867b95b31142298f3b6b59e182038b,BloodNtheSoil,let u make gorilla drop big thing,image-cdn.parler.com,English,[0.585793674],4,0.526367
121,p,a3c1141e81a84d7d8f86a9caeb512bba,20201207,1607375976000,9371b7b597374adb9e3286ca0e88e4d9,thetrumprepublicans,stop calling abortion altogether call really c...,,English,[0.617857635],4,0.425487


In [21]:
# exploring the 5th (6th) topic
pd.concat([df[(df["doc_labels"]== 5)].head(100), df[(df["doc_labels"]== 5)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,bo,dmn,bo_lang,conf,doc_labels,label_probs
2,p,1ba89bed2e0b43d0bc52b7d6b94fd238,20200624,1593002457000,d5d73de132694620b2daa06935c08bce,Gizpix,integral part took defeat fascism imperialism ...,image-cdn.parler.com,English,[0.9828278422000001],5,0.611046
11,p,97eabded33d740b3a276f43c763b4484,20200625,1593114589000,42313c2bf1a449759f4d0a5700b8d971,TheTonus,let u see #parlerpets bean love bird video con...,image-cdn.parler.com,English,[0.9074007273],5,0.337997
21,p,6393bbe845404bc3a64e342b97e0028d,20210104,1609796591000,cf686cad35e14b87a192f70eb2eecfa2,Kirtaner,lizard squad absolutely involvement deranged s...,,English,[0.9487286210000001],5,0.513585
30,p,b651558cddf0417fbb21053249adda04,20200904,1599247968000,b9028ae0830149f89c3a2c92a559864b,KevlarOrchid,wrong,image-cdn.parler.com,English,[0.8834820390000001],5,0.549986
45,p,dc888ffec7814facbf649419beb60cf7,20201201,1606830108000,9f7d945405cc4b4b8f43a8146f55245b,Chepoteam,really expect u believe america elected low te...,image-cdn.parler.com,English,[0.9134870768000001],5,0.518526
47,p,18c168779d154c43a11181c5a511f0f6,20190603,1559603399000,63cc94ca81db4a5fb8098770a1d0f5b3,LetItRain,cover every magazine week would sell,i.imgur.com,English,[0.9946091771000001],5,0.699952
76,p,63d08d67305c4d2788f7cac9f022737f,20201220,1608436974000,abd4f86e6eca4e709295c7fb8d7a459c,Lmtlmt,maybe uneducated something worse arewere democ...,,English,[0.9563829899],5,0.32131
86,p,cdbb154ee9f941e4ac32461385f9c83c,20190706,1562418210000,1fa958e609a143e29d976e4ed3ee02cf,LPBarb,must watch video love flag,youtube.com,English,[0.1332622617],5,0.397223
107,p,f91d2acc7c884b29910937747ffd7353,20200621,1592745348000,8fcc2694489348429dc7ee395690bb44,JayneDWales,hello everyone thanks following know twitter n...,,English,[0.9634028077000001],5,0.344951
111,p,4a87dafe3dfc4522bb09d8937cef3413,20200727,1595840616000,c985bb935dc54cf38ce9bb136e401ab1,ITSMoha,mtu akiwa na tattoodredibling bling bado ako h...,,English,[0.3395640254],5,0.549999


In [22]:
# exploring the 6th (7th) topic
pd.concat([df[(df["doc_labels"]== 6)].head(100), df[(df["doc_labels"]== 6)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,bo,dmn,bo_lang,conf,doc_labels,label_probs
88,p,177d59d220c2491fbaa2cd47bb6b1e2b,20200624,1592971020000,7739c5ea36584f9da14b1d4d528d9135,Bdomenech,joined parler looking forward meeting everyone,,English,[0.9758190513],6,0.871428
94,p,ce37de92b8474f049fcf41b79235b493,20200528,1590681147000,0103600fd71243bb91bac4d8662aae99,Drewsteeleradio,joined parler looking forward meeting everyone...,,English,[0.9623049498],6,0.887495
152,p,35e199adf15845fea9144b500d05a1ad,20200522,1590188718000,ac76637a840145c48e57a34cb96107cb,PeterSchweizer,joined parler looking forward meeting everyone...,,English,[0.9623049498],6,0.887495
168,p,8a103fb89b1149f7aa73346444624277,20191003,1570125606000,de287e8b6026980ea0372c95b19ceb80,DocG,loved wanted share parler,i.imgur.com,English,[0.9947702885],6,0.549993
191,p,58c84baee11f423eb87922c62e40cca2,20200602,1591062646000,b8c1c45d0e3f4e50b291ca5d0859901b,BurgessOwens,looking forward using parler thanks intro joepags,,English,[0.5615282655],6,0.605067
196,p,3a636d2018e74742a7566393e7a283d2,20200624,1592958842000,7cc1d9865e3843dfbb3bd82828734fea,BrendanCarrUSA,joined ben domenech federalist radio hour talk...,thefederalist.com,English,[0.8679339886],6,0.366667
201,p,cdb56ec0a4384e0cb85e3312a917eac5,20201207,1607367770000,f300295d5d524d329981b3a86a8647a8,NorseWolverine,parler posting police afraid graphic third att...,image-cdn.parler.com,English,[0.7183034420000001],6,0.366659
239,p,cb59c049b7ad450cab5c63f4ec29654d,20201113,1605291458000,3e5af0be81e4454f9cde3bde2507a69e,B157838,joined parler looking forward meeting everyone,,English,[0.9758190513],6,0.871428
241,p,3ee0de9a9bc94910828d6b600ee2a4bb,20201108,1604856701000,3c756ed2cdda41b98eb569d2115d1b00,ataylor406,joined parler looking forward meeting everyone,,English,[0.9758190513],6,0.871428
244,p,8b14e4458f224a7fbfc9801bbb458f7f,20201115,1605476246000,9983fbcf319e48cab28a02f7074e900d,Wildlifeprofessional,joined parler looking forward meeting everyone,,English,[0.9758190513],6,0.871428


In [23]:
# exploring the 7th (8th) topic
pd.concat([df[(df["doc_labels"]== 7)].head(100), df[(df["doc_labels"]== 7)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,bo,dmn,bo_lang,conf,doc_labels,label_probs
0,p,6c57a6fc477643508a43128d21c8d09f,20210106,1609948690000,79960d07ce484d7696b55ecf3873286d,PastorJDFarag,lie lie jesus truth satan lie listen pastor jd...,,English,[0.9649336934],7,0.849958
9,p,8c187abbe0004e9784c36a6e3cb68429,20200725,1595681659000,c9f37b4dacd640ef94bb1515bddf491c,Afleischer,must admit hard truth guide u year decade come...,washingtonpost.com,English,[0.9114711285],7,0.267562
12,p,9e6bc36b615c49d881751f43a3776cd5,20201113,1605241266000,eb2c5591fcc849abab65da16870f740d,sethdillon,happened today,notthebee.com,English,[0.9947810173],7,0.549954
37,p,b7c5a186d7124e8fa5c9fbd867b00518,20201130,1606759599000,543e7db5bac34255b46fb8dbb3f48c78,TheTruthRevolution,yous survive china want nothing wipe united st...,,English,[0.9824320674],7,0.434723
58,p,bd1af4436a2b4f21a8c9462195bd8139,20201122,1606075696000,ae6207ce4c414ba8ac37ba0dd33af0fb,wella71,echo heck article bottomthat match audio count...,,English,[0.3568269908],7,0.849976
75,p,5b71ec9779214f4084121a69b0eec7a9,20200629,1593423863000,ae3a19a8b5b1455bb6273809a56fba28,BaldCrusader,death french fishing industry death british fr...,,English,[0.9448599815000001],7,0.347654
83,p,03e0d50743d445a5a0a64647cab2681b,20200602,1591106499000,870f09602af148aeb926636910e9f2eb,JeffreyTexas,way destroy evil force rubber bullet tear gas ...,,English,[0.9498004913],7,0.525403
93,p,dac5abb2d92f4d2985acb696f4c7b798,20200626,1593161705000,e8a1433cb5b50bd8e756b6a9dd7bec18,victoriasummer,parler going take twitter excited watch happen...,,English,[0.9498657584],7,0.264017
109,p,47394f9dbf5f43bb910c5a54d9adaad6,20200626,1593170959000,45f653cfb3e640e0bb9c5d7c6b34d096,Grenell,time move federal agency state concentration u...,,English,[0.9528601766],7,0.467357
126,p,7215d5d092e44d78bebbef1c46e3c934,20201124,1606181867000,7e5b74b45c2b474a9c408dd9819636b5,Duncanbrannan,compliment party science lord god please give ...,lifenews.com,English,[0.8831388354],7,0.466431


In [24]:
# exploring the 8th (9th) topic
pd.concat([df[(df["doc_labels"]== 8)].head(100), df[(df["doc_labels"]== 8)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,bo,dmn,bo_lang,conf,doc_labels,label_probs
3,p,7469aeee64124d76bcd59c5c00dde32e,20201021,1603283718000,2311448602e5d4d70bba61b4d0f95fab,Bobbypickles,george godfather colella national president bo...,youtu.be,English,[0.8909293413],8,0.639408
5,p,7c2fd7b9c32a4ebf98d451ab65216076,20201103,1604428780000,e335306ced5042b98cfc2e2f911c79cd,meRorschach,one voting machine locked #brooklyn district v...,streamable.com,English,[0.9451391101000001],8,0.774947
6,p,45ea9f945eeb4c9681d22c0cd1de378c,20210101,1609545307000,109088bb7c29b75eb8688df98770d103,Jennybethm,president trump called people meet dc january ...,,English,[0.9820095897000001],8,0.515869
13,p,6c2a4894360c4c4d98abdd39eeebee15,20201130,1606778455000,10178df3e01f449c8f5d124967c1afe5,FreddA,pennsylvania lawmaker formally introduce resol...,theepochtimes.com,English,[0.8428465128],8,0.699989
17,p,bcead3a7a4fc482db7529d312bf18973,20200705,1593983613000,9b3258273b6149c28542034abda0e253,Spacex1,echo vote follow #patriots #vikingsfortrump mr...,media0.giphy.com,English,[0.47066479920000004],8,0.412037
23,p,b9285043f9134f52b2e48c63f8b91706,20201206,1607244847000,3842c232e98e48daa404efad19df02d0,Vanploeg,obvious someone stole dead people identity vot...,image-cdn.parler.com,English,[0.9622479081],8,0.611359
31,p,30a868b1fab54a56883ce4bf6cae4924,20200629,1593415739000,4945f1477e314d87b787f533b16a7225,RitaPanahi,rooftop korean storefront sikhswhat call couple,image-cdn.parler.com,English,[0.9127293229000001],8,0.549985
32,p,0d0c1270374c422390b776934504e2de,20201112,1605207344000,be63f5d23fd143a585069afa852cb828,FnFUpdates,breaking news dominion deleted 27 million nati...,image-cdn.parler.com,English,[0.3983666897],8,0.87156
42,p,43aab885acc844fbb3ca308e6d1e3cc0,20201204,1607080111000,5f5a69f5c0904b8aad7d5d27ae4e829d,HenryHunt,georgia election fraud fraud #electionfraud #g...,image-cdn.parler.com,English,[0.4518070817],8,0.774999
53,p,7a47d90037d64a1e9bc5e346b4ad5bfd,20200821,1597969002000,1310aa1894ed4c2ba7d7ee0a1f74b137,allumb,facebook group supporting cannon hinnant murde...,breitbart.com,English,[0.6543134451],8,0.373107


In [89]:
# exploring the 9th (10th) topic
pd.concat([df[(df["doc_labels"]== 9)].head(100), df[(df["doc_labels"]== 9)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
8,p,d1e9d86e830448c88b688eee73e1e011,20200925,1601055261000,8b67993183a14587a001010058d089d2,chucknellis,,god bless ronpaul praying god bless randpaul s...,English,[0.8366956711],9,0.688889
24,p,e4930bfc6c024eb7ab46755b3b315ce1,20200826,1598475954000,4831fcbb074444058224fb81a86dcbfc,Notter,image-cdn.parler.com,thank family,English,[0.9812156558],9,0.378688
25,p,914399af4bd44b5eb661cd190bdfc75a,20191012,1570885520000,21031f424913456591d9a9aed4ff26c7,Cobrarick98,i.imgur.com,going stand radical leftist democrat america g...,English,[0.9471607208],9,0.422408
59,p,66ece1e23f2a4bde84258a40a0376a0a,20201202,1606874858000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,thepalmierireport.com,project veritas drop cnn tape,English,[0.4568171501],9,0.533333
69,p,9f63a860ecee4a3283873954d56283da,20201117,1605582531000,8cb0c50fce61469c95869bfe5527edc2,ShannonPatriot,townhall.com,israel demand apology cnn host comparing trump...,English,[0.6816611290000001],9,0.688888
76,p,cd281ec6d6ed4cceb4e1f739d38b8622,20200612,1591934408000,376432f536af4ce8ba808ad8640c7f61,JimLin,lifenews.com,one giant step baby federal court upholds miss...,English,[0.7096105218000001],9,0.307537
79,p,9ab7e9dd97d243738f35b482f80beee1,20201002,1601675191000,99526563c1ad4c11a6378fca0d1552df,InfoWars,infowars.com,covid19 sabotage,English,[0.4126416147],9,0.533333
84,p,d31d1816210d421f8b215bbebf086bdb,20201109,1604914095000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,thedonald.win,109421 353 pa mailin ballot returned mailed do...,English,[0.9519491196000001],9,0.390092
144,p,f1a4b6849f0a4f64af7a50ef4cf487af,20200824,1598232427000,eeeb8dd25b7142b1bc69cbdbe1d8bb62,ThomasFox,media.thedonald.win,god bless,English,[0.2163947225],9,0.533333
151,p,51ee0aa6f8234fb89f43bf8c973d308f,20201027,1603836767000,75d997177c0944cba9d1e8c701ef5a4b,ViolentVixen29,theblaze.com,newly installed justice begin scotus career di...,English,[0.9778986573],9,0.766666
