In [1]:
import pandas as pd
import numpy as np
import pickle

# importing a vectorizer -> this is how we get to the topic model
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
# reading in the data
df = pd.read_json("ver_act_lang_filtered_pre_proc.ndjson", lines=True)

In [4]:
%%time
# loading the pickled model
lda_model = pickle.load(open('ver_act_lda_k10.pk', 'rb'))

CPU times: total: 62.5 ms
Wall time: 90.2 ms


In [5]:
# function to override the pre-proc that occurs within the vectorizer
# just returns the original string -> because I already had it clean
def dummy_func(x):   
    return x

In [6]:
def cust_tokenizer(x):
    return x.split()

In [7]:
# initializing a TFIDF vectorizer with unigram representation
# it's cheaper and it will allow for relative pruning (those terms appearing within fewer than .05% of docs or in more than 99%)
# feeding in our own functions for splitting and cleaning because otherwise it will mess up our plan
tf_vectorizer = CountVectorizer(
    min_df =.005, max_df = .99,  preprocessor=dummy_func, tokenizer=cust_tokenizer)

In [8]:
%%time
# tf vectorized fiting
tf_vectorizer.fit_transform(df["bo"])

CPU times: total: 1min 3s
Wall time: 1min 3s


<8081465x238 sparse matrix of type '<class 'numpy.int64'>'
	with 55804421 stored elements in Compressed Sparse Row format>

In [9]:
%%time
# vectorized docs
vectorized = tf_vectorizer.transform(df["bo"]) 

CPU times: total: 56.9 s
Wall time: 56.9 s


In [10]:
%%time
# makign the labels
lda_labels =  np.matrix(lda_model.fit_transform(vectorized))

CPU times: total: 5min 48s
Wall time: 1h 2min 47s


In [11]:
# normalizing the topic distribution
doc_topic_dist = lda_labels/lda_labels.sum(axis=1)

In [12]:
# code to pull the indices for top topics
df["doc_labels"] = doc_topic_dist.argmax(axis=1)

In [13]:
# code to pull the proability for each top topic
df["label_probs"] = np.amax(doc_topic_dist, axis=1)

In [14]:
# getting the value counts here. 
df["doc_labels"].value_counts(normalize=True)*100

0    19.124466
5    18.621228
8    18.531034
9    13.850966
1     9.577632
3     5.902135
7     5.599331
4     5.413387
2     3.294489
6     0.085331
Name: doc_labels, dtype: float64

In [15]:
# setting the display limit
pd.set_option('display.max_rows', 200) 

In [16]:
# exploring the 0th (1st) topic
pd.concat([df[(df["doc_labels"]== 0)].head(100), df[(df["doc_labels"]== 0)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
1,p,7563cfd8284049ea8323b2d58e5e5d24,20201216,1608086627000,f0a6181ff474e3b99426b8a69bdb99ed,CarolinaLakes,waynedupree.com,maga say screw gop mcconnell order republican ...,English,[0.7242468596],0,0.389073
6,p,985968f197f64fc2accacc6586ee8cac,20201001,1601557890000,b9764a80228dbf9c46ee3680bcfe45dc,sargon,image-cdn.parler.com,savage,English,[0.3323119283],0,0.1
7,p,04f91e099bca4afcb6fabe2dd6a19496,20201214,1607933385000,d58d43cb59bf4da0890a69d9fed86470,ChrisSalcedoShow,foxnews.com,texas man shot dead driveway home returning gr...,English,[0.9439343214],0,0.648012
9,p,c48f656602874fa4a05e4120ec3e2ffe,20201004,1601843950000,280219c34a2146889ae0160e5f8acaba,Juliet777777,truepundit.com,cnn refuse show exclusive clinton investigator...,English,[0.8532019854],0,0.46055
10,p,9fd9d236556942c9a814dce0cb2d68d0,20200801,1596243660000,81e389f14ce94420a41637214983e592,realwayneroot,thegatewaypundit.com,preparing column terrible misreporting exagger...,English,[0.8557288647],0,0.638543
16,p,fe511f31f7504ad681afc34fea59291b,20200902,1599086983000,913404de60d040039d866a9bd78c9c21,Gabriel99Dancer,youtu.be,omg priceless,English,[0.24153593180000002],0,0.1
17,p,a3da3fcc904d4c2a978c2df4ae5f03af,20201204,1607104899000,00a010dcc27449f39018621798831b53,Nateguez,,peoplewe comply echo sht spread wordstand folk,English,[0.5453474522],0,0.549988
32,p,9e1d05738c0642d7a74ddaa660b7c7d3,20200704,1593880979000,8bfa9319b5c84752a52da71124fa82d1,DavidIrvine1,,need voter id,English,[0.9614781141000001],0,0.367986
33,p,c40c890439d0450eba4fe5fb5b780e93,20201104,1604530256000,4f7118cc43e149fcb2beeb505b110a2a,DrJeromeCorsi,justthenews.com,trump assembling allstar legal team mount elec...,English,[0.8604565263],0,0.474419
36,p,1d8fbdde1ce8451a8459e4a78c065af5,20200922,1600756240000,59a3eccec64245f292a588f7eaf387b0,DavidVance,thesun.co.uk,apparently covid virus fooled closing pub rest...,English,[0.955190599],0,0.849989


In [17]:
# exploring the 1st (2nd) topic
pd.concat([df[(df["doc_labels"]== 1)].head(100), df[(df["doc_labels"]== 1)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
3,p,c035d5661845430d9d29fe54c824eede,20201002,1601678425000,c94f07d2db5b40e09b022a9d239d2d86,ntdnews,ntd.com,president trump remains good spirit mild sympt...,English,[0.9868649244000001],1,0.369954
20,p,8d6646c9eab24a7aa9c91f3f65b8c306,20191001,1569948180000,475f85273cd643abbbbf20f659031700,dhouska,dailycaller.com,custom border protection cbp army corp enginee...,English,[0.8850675821],1,0.414813
27,p,91a2f45e11a642748695a75e771afb3d,20201007,1602097949000,20039929947b40788943b6d52f658e7d,tjf2020,image-cdn.parler.com,#trump2020 #maga #trumptrain #donaldtrump #par...,English,[0.3623655736],1,0.775
30,p,dd09eeee333a4c8d93e399b1b8801346,20200919,1600557738000,3dc1a71aebdc4cf3a9ffff266e126602,patriotsfl,image-cdn.parler.com,arrest pedos #parler #wwg1wga #maga #kag #newu...,English,[0.3741851747],1,0.887493
31,p,e91fb3305fb14e6a986e8ba443e6d938,20210106,1609951421000,b21e1eb5cfb62c9ef9941ab06864c196,rogerstone,image-cdn.parler.com,today final rally #stopthesteal,English,[0.4990209043],1,0.549964
34,p,fbcb099ae1274b08ae18a7f5765df0d8,20201128,1606527441000,3dc1a71aebdc4cf3a9ffff266e126602,patriotsfl,image-cdn.parler.com,#parler #wwg1wga #maga #kag #newuser #news #tr...,English,[0.3690432906],1,0.887493
42,p,741f6feaa8214186a5ff53a3e2b5dfd2,20201229,1609252952000,6d561fe82c994a729bf7858e7499c364,QanonMemes,image-cdn.parler.com,verified potus tweeted woman last night name s...,English,[0.8940177560000001],1,0.677863
47,p,2e3a68c768264d378713f2ec20b12552,20201101,1604191626000,dd1466fccd1c44cc94958b614fccebb8,kagbabe,nypost.com,hunter biden email show leveraging connection ...,English,[0.8544764519],1,0.774996
65,p,218fbb96efa342feac8c7c3240cc7b02,20200727,1595867519000,bd4bae496f0f4ca79ef64e4ba19c90ef,HempOilCures,image-cdn.parler.com,#maga #trump2020,English,[0.49971511960000004],1,0.7
67,p,8642d3cd4f3c45448f226b2dc84cabe4,20201026,1603698285000,8b425d88f40948aca80265f1f7605fb4,SheriffClarke,huffpost.com,suspended tell knock move making big deal noth...,English,[0.9839231968000001],1,0.628279


In [18]:
# exploring the 2nd (3rd) topic
pd.concat([df[(df["doc_labels"]== 2)].head(100), df[(df["doc_labels"]== 2)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
15,p,2f2e80a0b032400bab91bfb267429a1c,20200926,1601141794000,7537accec5b6c6dbefa28dfce2a1a19d,CitizenFreePress,citizenfreepress.com,kate brown declares proud boy emergency citize...,English,[0.3245681822],2,0.549957
22,p,877ab56f208e47059b75e30b1d2da5a8,20200924,1600968070000,6db9626755c54f36817ed78f105c3e85,Jinsaburo,,donald j trump thank never let,English,[0.8605231643],2,0.349738
29,p,7184163e55ec482b9c9f35a79cd8f62a,20200203,1580746836000,ee398d4572ae353a16e4ac115418241b,thewashingtonpundit,image-cdn.parler.com,know meant poke chest,English,[0.9952878952],2,0.549988
38,p,34a6eac82f784548abacf8e3b503b028,20201114,1605368470000,44a446dbcdc540b79cff49792044893d,VeilsideMustang,,wisconsin voter file lawsuit exclude 792000 vo...,English,[0.9300326705],2,0.41911
75,p,046f7321c0af4aaca57ea886894c0827,20200804,1596512982000,280219c34a2146889ae0160e5f8acaba,Juliet777777,imgflip.com,demsfeartruth loose lip sink ship know downloa...,English,[0.8422780633],2,0.549988
91,p,3d3020e5ddb94f76a53b177e81a2029f,20200821,1598048566000,ca45220452484392b92191d470c6a4e5,Crusaderpatriot,,look see paying bromsgrove hotel spa birmingham,English,[0.9620575309],2,0.379381
95,p,343d3fbed88249cb9292cf2fab22f2b4,20201216,1608103096000,87429c2ccbaf415892940b36b0efded3,BeachMilk,weforum.org,globalists knew getgo would destroy u economic...,English,[0.9725492597000001],2,0.69995
127,p,dc489b5841844d0ab27256e7b152955a,20201025,1603646280000,3d01c1d281fe47f3a647418595d8c581,OldWest,dailywire.com,california middleschooler threatened jail time...,English,[0.37274605040000003],2,0.672021
140,p,decf7eea7d32417682a4d880c0e0435e,20201115,1605399529000,6d0dfd6ea883439b88767ff2fc65748b,moon52,legalinsurrection.com,see moon shuttle #moon52spaceshuttles moon spa...,English,[0.2549116611],2,0.699975
151,p,b4d49402269440b6b4b15c938fc425fe,20201012,1602516150000,3d241dda271646ea8c617043fb8a4d5e,davep,image-cdn.parler.com,hey aoc go read history book join conversation...,English,[0.8377574086],2,0.519522


In [19]:
# exploring the 3rd (4th) topic
pd.concat([df[(df["doc_labels"]== 3)].head(100), df[(df["doc_labels"]== 3)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
8,p,f065237399c64d5eaa7f31bf53eacf7f,20200810,1597064470000,de287e8b6026980ea0372c95b1439588,DoingRight1,,superstar dbongino explains happen usa step ga...,English,[0.5266726613],3,0.515653
11,p,6eb846d04afc49f39dfc7431071540cc,20200928,1601313973000,84c3af7ef263409ebd2a922b166f28bb,Rdsizemore1945,image-cdn.parler.com,hillaryobama sleepy eyed joe hope three pay 91...,English,[0.8901211023000001],3,0.371255
13,p,6fe167b3e1c14ef097f97b1974a87efd,20190801,1564689161000,feb4be5b160e441f9e53c853e47cb631,Xlntjoy,i.imgur.com,really know donald trump,English,[0.7271975279],3,0.422771
21,p,86678d5ec0d64195bf8d78322e6c0f3d,20201010,1602349292000,71278b8117fe48b8beed5f40f49dfae7,Seanchaidh,theepochtimes.com,trump say donate blood plasma longer medicatio...,English,[0.6668193936],3,0.486892
35,p,1b873692b6e84503b71e814d8f8bf7e2,20200304,1583346489000,1ac7812f009a4e0fb09f9b943d28b197,WashTimesOpEd,washingtontimes.com,r emmett tyrrell jr divided america continue #...,English,[0.7287251949],3,0.549996
48,p,81dbf9b347d5402282495ea06080f694,20201116,1605555048000,fbbfd48a4b5a41c9b574442ae76a8674,Thequartering,youtu.be,new video please share million maga march took...,English,[0.7499648333000001],3,0.397373
50,p,9125874e40a34dddb4c082f52ef88611,20201129,1606668098000,2fc0e045cdf944958f5141f59df24bd5,Gatorcoastie,image-cdn.parler.com,president trump team trump2020,English,[0.9372467995],3,0.699983
55,p,69f9ec7a0aa54505b91790f438eedac9,20201027,1603809099000,bfcd1929687d440792e9c90d78fda57b,Jsolomonreports,justthenews.com,newt gingrich release video slamming biden fam...,English,[0.8269862533000001],3,0.464677
58,p,55990e3267e24eb3bcf25ff71d23f170,20210108,1610115571000,606eb22054f043f4858be080f33b5996,AtomicDawg,nypost.com,elaine chao mitch mcconnells handler first tru...,English,[0.7049143910000001],3,0.368119
62,p,144e6d35a8bd4aeabb7aff4a47106815,20190708,1562594743000,a985e163836345f39fb1df80174d97a0,GretchenInOK,,current vicious war cop america began july 22 ...,English,[0.9566092491],3,0.301654


In [20]:
# exploring the 4th (5th) topic
pd.concat([df[(df["doc_labels"]== 4)].head(100), df[(df["doc_labels"]== 4)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
2,p,aa00ca14eeb343228ca5d942fed57ac2,20200710,1594419435000,34968cde93e74071990f927980baa2d9,Marklevinshow,thefederalist.com,democrat partypress outright liar,English,[0.703132391],4,0.549995
18,p,e6ae72fd72e445e0988966abaf73fbaf,20201207,1607307921000,0b4fadd8e84745e4a6d29e49900ad6d6,LoyalPatriot,thegatewaypundit.com,ga trump supporter rip fox news reporter griff...,English,[0.4727155864],4,0.498046
19,p,8546e64c496b40e09012316f2e3c4811,20201228,1609125074000,1482f4613132408587f27f1cd92a8496,JewishMolonLabe,image-cdn.parler.com,democrat nazi,English,[0.8936638236000001],4,0.549995
25,p,a3a3600a973f4e3cb7f2562b86adec3d,20200728,1595904621000,30fbb36f04684644b7f4e0c3d1fec2c6,Yeahmosaidit,theblaze.com,bombshell leaked document reveal seattle city ...,English,[0.9547910094000001],4,0.767786
49,p,17d08e3b3d2a4386bafb4c93e69031e3,20201126,1606351221000,b1ad1b19a70d43ffa9c21a1b650022a5,TommyRobinson,image-cdn.parler.com,great news georgia 11th circuit grant lin wood...,English,[0.5984238982],4,0.309769
56,p,0c0d89d63434442c857ea4c46f098039,20200721,1595362013000,1ac7812f009a4e0fb09f9b943d28b197,WashTimesOpEd,washingtontimes.com,inside beltway new cultural perspective couple...,English,[0.9430705309],4,0.699943
84,p,986e11e37ee44a0e89b7d929d51dea27,20201022,1603391535000,7537accec5b6c6dbefa28dfce2a1a19d,CitizenFreePress,citizenfreepress.com,one show hear obama speak philly citizen free ...,English,[0.817107141],4,0.387784
92,p,7766215ffc194e81bd3840f4175b9b28,20201123,1606172003000,e8aa2091a216a7c5427090fc73288442,flintbedrock,image-cdn.parler.com,new trump legal team statement state certifica...,English,[0.7691389918],4,0.774975
98,p,4b590460b23a47b18c819d727fa70122,20200825,1598366200000,b1ad1b19a70d43ffa9c21a1b650022a5,TommyRobinson,youtube.com,probably sensible review jacob blake police sh...,English,[0.8146625757],4,0.549946
147,p,9c653bbc2e414ba584a01e50bbcd462c,20201001,1601522111000,84c3af7ef263409ebd2a922b166f28bb,Rdsizemore1945,image-cdn.parler.com,lebronthis true super star conduct success,English,[0.939694345],4,0.549996


In [21]:
# exploring the 5th (6th) topic
pd.concat([df[(df["doc_labels"]== 5)].head(100), df[(df["doc_labels"]== 5)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
224,p,a410fd23711e4df58a075992b969c26c,20201108,1604838799000,7537accec5b6c6dbefa28dfce2a1a19d,CitizenFreePress,citizenfreepress.com,clark county whistleblower step forward citize...,English,[0.4587374032],5,0.366865
742,p,df590c870a354896b213a3feed68e74d,20200819,1597840704000,00a010dcc27449f39018621798831b53,Nateguez,image-cdn.parler.com,excellent information looking exercise #2ndame...,English,[0.45574525],5,0.301719
923,p,a0d55d937ab4486492c607c98c265ca3,20201119,1605817287000,7537accec5b6c6dbefa28dfce2a1a19d,CitizenFreePress,citizenfreepress.com,giuliani laugh reporter face citizen free press,English,[0.4442633092],5,0.367661
971,p,4e169c89a6d346b29da91e5b44cb3274,20200929,1601368133000,099ba62eabc440349a2b116b71414937,Milesy45,image-cdn.parler.com,looking forward trump demolishing fool tonight...,English,[0.8210879564],5,0.52497
1316,p,9e45f9ae92fe49dc823a4255f19aad9d,20201109,1604880361000,b0e195b5dd7148eea2c3a66ae585b7fb,LordStanley,image-cdn.parler.com,parler quadrupled user last 24 hour slowdown g...,English,[0.9711269140000001],5,0.41987
1618,p,563d98ff81bc48a6b75695b3a186f47b,20201018,1603032318000,8affcd43adee4f41bb9a7661c0eea697,TiffanythePatriot,image-cdn.parler.com,looking forward new bombshell tomorrow,English,[0.6818246841000001],5,0.524939
3006,p,928d7c5151fa45fb87831882dfa4d256,20201016,1602872825000,e3cbb2db881d46d8a9744da3882772e8,SOLARPRINCE,,joined parler looking forward meeting everyone,English,[0.9758190513],5,0.620878
3120,p,ad2d1e1bd8cb4f1baf42f38e1ba26d48,20200116,1579182995000,c33792e0420844d6a7c20590294e783e,RoosterGogburn,foxnews.com,could well ugliest person face earth,English,[0.9590981007],5,0.275646
3153,p,08236ea1-0e59-4040-9216-a389f6b14319,20210109,1610156149000,11ebbb9aa2cc466d81945f23b566b997,Technideb,,google dropped parler gab play store list,English,[0.2757404745],5,0.549965
3284,p,723cd756e5b04491a706b20cad9d5759,20200726,1595785510000,8a589665ff884d3f832c0d8a09f98d30,StevenGorges,,looking forward life liberty levin tonight,English,[0.7680817246],5,0.524991


In [22]:
# exploring the 6th (7th) topic
pd.concat([df[(df["doc_labels"]== 6)].head(100), df[(df["doc_labels"]== 6)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
343,p,160af44853ca48c88219eeeca0b9810f,20200908,1599589234000,9e5118b983014829aa1ea0ff824d76f7,LiveLikeAPatriot,thefederalist.com,wrong thatand hindsight glad wrong,English,[0.9974550009],6,0.300106
1698,p,4877b254bd6c41b9ad0c5f81e390f069,20201015,1602787609000,823acd76fe299a9a98851c9cdad78df5,Dbongino,,thing leftist radical hate free speech police ...,English,[0.8834730387],6,0.371737
1789,p,8c30152d6e9d402d8461b25448b1849d,20201003,1601747931000,2ba4258e7492473fa1e957ef5be902e4,mtursi2000,image-cdn.parler.com,breaking news melania trump chief staff say fi...,English,[0.9550622702],6,0.269366
2087,p,f586ed592c5247a5b9350e2f0c09f1e4,20200930,1601433769000,56aac19666fc43b19d4e9f01a45794b6,Mcbg1988,,debate glad buy popcorn,English,[0.9675937295],6,0.300106
2660,p,6d51382f28bb4acc8098a1ed4d888129,20200801,1596302848000,f195a56253da4abfa5f07dc101037317,NancyP,dailywire.com,america burn u flag bible free speech also ame...,English,[0.618042171],6,0.440867
2703,p,6e573ee38c8e4b72a57ce4ea8621e5ee,20200517,1589746214000,76091b8a59f547ed8caafc5f8b398540,Doc05,cnn.com,wtf glad fucking sickenes,English,[0.8187133074],6,0.300106
3088,p,43d5bfac5c77421e9bccbcc8908e324a,20200730,1596079418000,f54d0b4c8d624dadb5403deaafe9d252,JasperJax,flip.it,#brainlesstrump go full cognitive decline bumb...,English,[0.7578647137000001],6,0.522242
4083,p,f300fe7340964db78b2c329d55177ecb,20201028,1603929596000,e56ad10accd24b808b373665e0561e0c,Pvoob,breitbart.com,pamela geller win free speech case detroit pub...,English,[0.7011067867],6,0.414216
4391,p,26c0f5bb4f9149188f17e52282575d19,20200626,1593159972000,c5c76ad783804540a9b7edc708992cad,DRandolph,,glad sick tired bias unfair fb ig twitter shut...,English,[0.9294967055000001],6,0.221802
4988,p,d1ef93798c0c42179b90320d4cbe2c66,20201221,1608571850000,c85382f580ff4d86ad9143468e1ce560,Chezkersheave,youtube.com,free speech,English,[0.9724861383000001],6,0.368454


In [23]:
# exploring the 7th (8th) topic
pd.concat([df[(df["doc_labels"]== 7)].head(100), df[(df["doc_labels"]== 7)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
26,p,dec3f8f9e3cb40ac80d6a3d19af929f7,20200723,1595465410000,2c231eaa5598466dbdb2e1925ce94dbb,BidenNever2020,thegatewaypundit.com,joe biden smear trump first racist president f...,English,[0.9061816931000001],7,0.401205
54,p,b70eb893624e4442ac7103e240941ee5,20200628,1593311463000,9c28c4aa8b97449297a73b36a2edf218,TraderMD,zerohedge.com,shot fired saudi arabia forced 3 iranian boat ...,English,[0.8799729943],7,0.549996
73,p,68ff083c8e8346dd9955de8cbf405daf,20201116,1605538839000,28c7f65211d8405c86c175e459077fae,RealDanMeyer,,installed new dominion fitness app took walk a...,English,[0.9085925221000001],7,0.619879
78,p,e1bfc276dfec4873a1defa75002885e7,20200822,1598087776000,bd4bae496f0f4ca79ef64e4ba19c90ef,HempOilCures,,#gates foundation negotiated 100b #contacttrac...,English,[0.6999433041],7,0.366752
89,p,7d644db4dd174db4aa662e7177e2dbf7,20201121,1605979504000,e8aa2091a216a7c5427090fc73288442,flintbedrock,devex.com,guess primary funder smartmatic click link scr...,English,[0.7982589602],7,0.69995
119,p,565af0726fed49779c6e1d6bd6b700d8,20200619,1592525088000,81e389f14ce94420a41637214983e592,realwayneroot,thegatewaypundit.com,ignorant liberal politician protestors wanted ...,English,[0.9451298714],7,0.630039
128,p,04b17eadc475416fb60bb9d71502c191,20201023,1603421070000,0df4be32f09c41399599de6018b6b0d0,RenaNicole,image-cdn.parler.com,good night patriot,English,[0.5012181997],7,0.699972
130,p,8d4e8f5ad2a14435b8a93e07efe00994,20201220,1608490983000,63ae8ebb1b4f474a9524b98a1748ceff,Aliceitstime0817,truth11.com,writing link provided contributing reader tim ...,English,[0.9335440993],7,0.412339
136,p,cdef9a20e5b74c7eaa20006756af2e52,20200920,1600602952000,606eb22054f043f4858be080f33b5996,AtomicDawg,image-cdn.parler.com,make sick stomach,English,[0.9389681816000001],7,0.549986
150,p,9749ab298a6a4cff83e2c3bead83ff6d,20200916,1600279430000,ea5548d2b27a47d5a118a954ac4a41af,EllieBOfficial,youtube.com,know small cog big wheel number 2 nursing home...,English,[0.9343290925000001],7,0.324369


In [24]:
# exploring the 8th (9th) topic
pd.concat([df[(df["doc_labels"]== 8)].head(100), df[(df["doc_labels"]== 8)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
4,p,7468cdce0d8d44439255cfc610986b0d,20200905,1599316076000,e6a367f0cc0f4ac49177186f65e3285d,EpochOpinion,theepochtimes.com,opinionby col ret john mill maritime naval dom...,English,[0.8748269081000001],8,0.699985
398,p,5d168a751f1f4520929da4f04f4cd175,20200628,1593371038000,825ee061818e87be763f3853af245c7e,ALX,,welcome parler seanhannity,English,[0.4361646175],8,0.366666
681,p,211440d402e244eab82d57d66b7b09b5,20201118,1605730027000,606eb22054f043f4858be080f33b5996,AtomicDawg,neonnettle.com,tucker carlson elite pushing covid lockdown us...,English,[0.594363451],8,0.366666
777,p,d92538891a5d431f96c9fb24208b0276,20191111,1573498117000,da19504cfec54b7e900a1e888c9ce394,ChristianAmerican,www.christianheadlines.com,seems illegal u,English,[0.9974054098],8,0.549975
1231,p,ce1ad7e8307f441c8d6675ec5ddb3103,20201209,1607521461000,973eb5bd6d2cb8a251642379252e2585,Milo,image-cdn.parler.com,hi welcome twitter,English,[0.5223123431],8,0.366666
1495,p,f0ac44d5b67b4d09b299a9f0f6c3209e,20201117,1605636465000,e7b460306f0f4a5e974bc0adeeb46531,TimRunsHisMouth,,promised eagle rare lunch follow new fun proje...,English,[0.5563858747],8,0.366664
1997,p,77e8b315e55d4f78b557ae51b9dca5fd,20200710,1594407815000,994e427d3797dfac5e9d074f718f7baf,FredWimpy,foxnews.com,u army investigating handout calling make amer...,English,[0.6492303014],8,0.346291
2163,p,eef4e8bc879642238f63bbaab95113d6,20200802,1596411907000,d06034a2080646ff875fdca2a79194ac,Orca3,facebook.com,beautiful splashdown welcome home bob doug,English,[0.6933572292],8,0.366666
3073,p,55f86ee25b2d4a19977a1b09e253e515,20200720,1595283156000,5b8a926b659b4f0da36b0d8a4044e739,Marlor,twitter.com,general flynn activated welcome back general,English,[0.6064807773],8,0.366666
3724,p,73ad353f5bec40daa31e6ca11c07b93b,20200601,1591017426000,fcefcf16b5b74052a191aa83e97f647e,BorisB,image-cdn.parler.com,foghornwisdom thefriendlyghost last week super...,English,[0.9490778446],8,0.419422


In [25]:
# exploring the 9th (10th) topic
pd.concat([df[(df["doc_labels"]== 9)].head(100), df[(df["doc_labels"]== 9)].tail(100)],axis=0)

Unnamed: 0,t,id,cd,c,u,un,dmn,bo,bo_lang,conf,doc_labels,label_probs
0,p,37e30a565f17406ba22a4424677e4680,20201123,1606168257000,b2627a26f92b41b2b6b342b5bfb5b1f0,johnnypatriot1,foxnews.com,god democrat pure anti america evil filth woul...,English,[0.8910415769000001],9,0.606022
5,p,d7d8f89e039b4a7a921f3d4a37cc077d,20201027,1603822560000,aa29151a0fd14f7b85cc3ce967063a71,Dogman1013,,exactly one week till end covid pandemic get h...,English,[0.7406236529],9,0.819962
12,p,303fa09a79534a9a8a802ab052385a47,20201129,1606637823000,2fc0e045cdf944958f5141f59df24bd5,Gatorcoastie,threadreaderapp.com,extremely interesting webpage follow quite som...,English,[0.9816192985000001],9,0.375354
14,p,62c768b777b445b0b63333b69fa4c340,20201222,1608612499000,236c8e47574090bdbe2da315f33636e6,waynedupreeshow,waynedupree.com,first election history based nation perceived ...,English,[0.9900836349000001],9,0.909982
23,p,dde6f05754e249d584b4579fe7425fd4,20200331,1585681192000,8c569b2f75e141a0a95f331b91b9a486,rowdydowdy,noqreport.com,god bless mr myyypieeeelow man medium need hol...,English,[0.27599564190000003],9,0.727938
24,p,42e10526a28042b0b17536bdf69af6f8,20200717,1594954204000,f4b502aed561cbafc5d1e97cc4164719,Jmstamper,,still trump supporter ask continues fight hard...,English,[0.9796171188],9,0.918164
28,p,6934a682df254f18b528668165bfdc36,20201209,1607533397000,e6a367f0cc0f4ac49177186f65e3285d,EpochOpinion,theepochtimes.com,opinionby salena zito much lost year #pandemic...,English,[0.9760086536],9,0.88745
41,p,a4f0cfc3a18c4bb8842b113fc0094c4e,20200522,1590163651000,b0a7c2a879c5371970645fa444450a2e,scottpresler,,today driving linden va help register 2 voter ...,English,[0.9368332624000001],9,0.535339
53,p,1fd30ab78c5346b88a9584a3c1e091b0,20190907,1567894678000,8e1681238d0a40649224d6e709d7c595,gypsycrusader,www.dailymail.co.uk,mit took 17 million jeffery epstein much money...,English,[0.9897519946000001],9,0.629788
69,p,1df21398daed4750852064e23e91748d,20190831,1567264376000,06411b8ed39b46ab8059813f1d35ebf5,KittyQNyan,thehill.com,worst still come jim comey thehill,English,[0.9616980553000001],9,0.699984
