In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
rawdat = pd.concat([pd.read_csv('../data/input/Auschwitz_segments_03112020_1.csv'),
                   pd.read_csv('../data/input/Auschwitz_segments_03112020_2.csv')])
rawdat.shape

(1067267, 12)

In [3]:
biodata = pd.read_csv('../data/input/biodata_birkenau.csv')

In [4]:
# filter only used interviews from biodata_birkenau.csv
relevant_rawdat = rawdat[rawdat.IntCode.isin(biodata['IntCode'])]
relevant_rawdat.shape

(584447, 12)

In [5]:
# create a keyword ID - Label map
kwID_kwLabel_map = relevant_rawdat[['KeywordID', 'KeywordLabel']]
kwID_kwLabel_map.drop_duplicates(inplace=True)
kwID_kwLabel_map.reset_index(inplace=True, drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [6]:
kwID_kwLabel_map

Unnamed: 0,KeywordID,KeywordLabel
0,7601,Auschwitz II-Birkenau (Poland : Death Camp)
1,13310,"Oświęcim (Kraków, Poland)"
2,14226,Poland 1944 (July 22) - 1945 (January 16)
3,12044,camp selections
4,14280,loved ones' separations
...,...,...
2334,83352,Christians
2335,59509,European history
2336,15692,attitudes toward Canada and/or Canadians
2337,62391,attitudes toward socialism and/or socialists


In [7]:
# find kwIds, that are in the first and last segments of relevant interviews
first_kwIDs = []
last_kwIDs = []

for intcode in pd.unique(relevant_rawdat.IntCode):
    interview = relevant_rawdat[relevant_rawdat.IntCode==intcode]
    
    # identify first and alst segments
    # extract KwID
    first_kwID = interview[interview.SegmentNumber == interview.SegmentNumber.min()].KeywordID
    last_kwID = interview[interview.SegmentNumber == interview.SegmentNumber.max()].KeywordID
    print (first_kwID)
    first_kwIDs.append(first_kwID.to_numpy())
    last_kwIDs.append(last_kwID.to_numpy())
    
# concatenate
first_kwIDs = np.concatenate(first_kwIDs)
last_kwIDs = np.concatenate(last_kwIDs)

0     7601
1    13310
2    14226
Name: KeywordID, dtype: int64
43     7601
44    13310
45    14233
Name: KeywordID, dtype: int64
153     7601
154    12044
155    16503
156    13310
157    14233
158    25452
Name: KeywordID, dtype: int64
179     7601
180    10852
181    16328
182    14049
183    13310
Name: KeywordID, dtype: int64
428     7601
429    13310
430    14233
Name: KeywordID, dtype: int64
506     7601
507    13310
508    14233
Name: KeywordID, dtype: int64
606     7601
607    10983
608    13310
609    14233
Name: KeywordID, dtype: int64
729     7601
730    10983
731    16328
732    13310
733    14233
Name: KeywordID, dtype: int64
968     7601
969    13310
970    14233
Name: KeywordID, dtype: int64
1127     7601
1128    12044
1129    14280
1130    13310
1131    14233
Name: KeywordID, dtype: int64
1271     7601
1272    13310
1273    14233
Name: KeywordID, dtype: int64
1392     7601
1393    13310
1394    14233
Name: KeywordID, dtype: int64
1479     7601
1480    13310
1481    1423

20184     7528
20185    14280
20186    13310
20187    14233
Name: KeywordID, dtype: int64
20727     7601
20728    13310
20729    14233
Name: KeywordID, dtype: int64
20903     7601
20904    10983
20905    13310
20906    14233
Name: KeywordID, dtype: int64
20992     7601
20993    10983
20994    13310
20995    14233
Name: KeywordID, dtype: int64
21313     7601
21314    13310
21315    14233
21316    16345
Name: KeywordID, dtype: int64
21555     7601
21556    13310
21557    14233
Name: KeywordID, dtype: int64
21874     7601
21875    12044
21876    16328
21877    13310
21878    14233
Name: KeywordID, dtype: int64
21958     7528
21959    10983
21960    13310
21961    14233
Name: KeywordID, dtype: int64
22044     7601
22045    10983
22046    14049
22047    13310
22048    14233
Name: KeywordID, dtype: int64
22131     7601
22132    13310
22133    14233
Name: KeywordID, dtype: int64
22189     7528
22190    10983
22191    16123
22192    13310
22193    14226
Name: KeywordID, dtype: int64
22228     

40366     7601
40367    13310
40368    14233
Name: KeywordID, dtype: int64
40417     7601
40418    12044
40419    13310
40420    14233
Name: KeywordID, dtype: int64
40859     7601
40860    13214
40861    16328
40862    13310
Name: KeywordID, dtype: int64
40915     7601
40916    26611
40917    13310
40918    14233
40919     4083
Name: KeywordID, dtype: int64
40994     7528
40995    10983
40996    13310
40997    14233
Name: KeywordID, dtype: int64
41075     7601
41076    13310
41077    14233
Name: KeywordID, dtype: int64
41263     7601
41264    13310
41265    14233
Name: KeywordID, dtype: int64
41288     7601
41289    10983
41290    15456
41291    13310
41292    38840
41293    14233
Name: KeywordID, dtype: int64
41384     7601
41385    10853
41386    13310
41387    14233
Name: KeywordID, dtype: int64
41480     7601
41481    10852
41482    13310
41483    14233
Name: KeywordID, dtype: int64
41837     7601
41838    13310
41839    14233
Name: KeywordID, dtype: int64
41960     7601
41961    1

61498     7601
61499    13310
61500    14233
Name: KeywordID, dtype: int64
61534     7601
61535    13310
61536    14233
61537    16192
Name: KeywordID, dtype: int64
61625     7601
61626    16503
61627    26611
61628    13310
61629    14233
Name: KeywordID, dtype: int64
61749     7601
61750    10983
61751    13310
61752    14233
Name: KeywordID, dtype: int64
62175     7601
62176    12044
62177     4047
62178    13310
62179    14233
Name: KeywordID, dtype: int64
62432     7601
62433    13310
62434    14233
Name: KeywordID, dtype: int64
62574     7601
62575    13310
62576    14233
Name: KeywordID, dtype: int64
62617     7601
62618    13310
62619    14233
Name: KeywordID, dtype: int64
62687     7601
62688    10983
62689    13310
62690    14233
Name: KeywordID, dtype: int64
62869     7601
62870    13310
62871    14233
Name: KeywordID, dtype: int64
62929     7601
62930    13310
62931    14233
Name: KeywordID, dtype: int64
63161     7601
63162    12044
63163    13310
63164    14233
Name: Keyw

84619     7601
84620    10983
84621    13310
84622    14233
Name: KeywordID, dtype: int64
84701     7601
84702    10983
84703    13310
84704    14233
Name: KeywordID, dtype: int64
84831     7601
84832    12044
84833    13310
84834    14233
Name: KeywordID, dtype: int64
84903     7601
84904    10853
84905    13310
84906    14233
Name: KeywordID, dtype: int64
84996     7601
84997    10983
84998    13310
84999    14233
Name: KeywordID, dtype: int64
85132     7601
85133    10983
85134    13310
85135    14233
Name: KeywordID, dtype: int64
85369     7601
85370    13310
85371    14233
Name: KeywordID, dtype: int64
85515     7528
85516    12044
85517    13310
85518    14233
Name: KeywordID, dtype: int64
85573     7601
85574    11672
85575    14280
85576    13310
85577    14233
Name: KeywordID, dtype: int64
85641     7601
85642    13310
85643    14233
85644    12161
Name: KeywordID, dtype: int64
85712     7601
85713    16856
85714    16328
85715    13310
85716    14233
Name: KeywordID, dtype: i

108848     7601
108849    13310
108850    14233
Name: KeywordID, dtype: int64
108906     7601
108907    12044
108908    14280
108909    13310
108910    14233
Name: KeywordID, dtype: int64
108937     7601
108938    13310
108939    14233
Name: KeywordID, dtype: int64
109066     7601
109067    16328
109068    13310
109069    14233
Name: KeywordID, dtype: int64
109187     7601
109188    13310
109189    14233
Name: KeywordID, dtype: int64
109532     7601
109533    26399
109534    12044
109535    15092
109536    13310
109537    14233
109538     6861
Name: KeywordID, dtype: int64
109555     7601
109556    10983
109557    13310
109558    14233
Name: KeywordID, dtype: int64
109781     7528
109782    10983
109783    13310
109784    14233
Name: KeywordID, dtype: int64
109863     7601
109864    16328
109865    13310
109866    14233
Name: KeywordID, dtype: int64
109940     7601
109941    13310
109942    14233
Name: KeywordID, dtype: int64
110212     7528
110213    13310
110214    14233
Name: Keywor

129623     7528
129624    13310
129625    14233
Name: KeywordID, dtype: int64
129712     7528
129713    13310
129714    14233
Name: KeywordID, dtype: int64
129908     7601
129909    10983
129910    13310
129911    14233
129912    15471
Name: KeywordID, dtype: int64
129981     7601
129982    12044
129983    13310
129984    14233
Name: KeywordID, dtype: int64
130027     7601
130028    13310
130029    14233
Name: KeywordID, dtype: int64
130164     7528
130165    10698
130166    13310
130167    14233
Name: KeywordID, dtype: int64
130191     7528
130192    14377
130193    13310
130194    14233
Name: KeywordID, dtype: int64
130333     7601
130334    13310
130335    14233
Name: KeywordID, dtype: int64
130692     7601
130693    13310
130694    14233
Name: KeywordID, dtype: int64
130771     7601
130772    13310
130773    14233
Name: KeywordID, dtype: int64
130890     7601
130891    13310
130892    14233
Name: KeywordID, dtype: int64
131028     7601
131029    10983
131030    13310
131031    1423

153368     7528
153369    10983
153370    14049
153371    13310
153372    14233
Name: KeywordID, dtype: int64
153612     7601
153613    13310
153614    14233
Name: KeywordID, dtype: int64
154383     7601
154384    13310
154385    14233
154386    16345
Name: KeywordID, dtype: int64
154483     7601
154484     3995
154485    13310
154486    14233
Name: KeywordID, dtype: int64
154594     7528
154595    13310
154596    14605
154597    15350
Name: KeywordID, dtype: int64
154715     7601
154716    13310
154717    14233
Name: KeywordID, dtype: int64
154754     7601
154755    12044
154756    14280
154757    13310
154758    14233
Name: KeywordID, dtype: int64
154984     7601
154985    19284
154986    16328
154987    13310
154988    14233
Name: KeywordID, dtype: int64
155069     7601
155070    10983
155071    13310
155072    14233
Name: KeywordID, dtype: int64
155171     7528
155172    13310
155173    14233
155174    16142
Name: KeywordID, dtype: int64
155284     7601
155285    13310
155286    14

174574     7528
174575    10853
174576    12044
174577     4047
174578    13310
174579    14233
Name: KeywordID, dtype: int64
174751     7601
174752    10853
174753    12044
174754    13310
174755    14233
Name: KeywordID, dtype: int64
174855     7601
174856    13310
174857    14233
Name: KeywordID, dtype: int64
174949     7601
174950    13310
174951    14233
Name: KeywordID, dtype: int64
175182     7601
175183    10983
175184    13310
175185    14233
Name: KeywordID, dtype: int64
175621     7601
175622    10983
175623    13310
175624    14233
Name: KeywordID, dtype: int64
176962     7601
176963    10983
176964    13310
176965    14233
Name: KeywordID, dtype: int64
177009     7601
177010    13819
177011    14280
177012    13310
177013    14233
Name: KeywordID, dtype: int64
177062     7601
177063    12044
177064    13310
177065    14232
177066    14233
Name: KeywordID, dtype: int64
177111     7601
177112    12127
177113    13310
177114    14233
Name: KeywordID, dtype: int64
177152     7

195303     7528
195304    11672
195305    14280
195306    13310
195307    38840
195308    14233
Name: KeywordID, dtype: int64
195363     7601
195364    13310
195365    14233
Name: KeywordID, dtype: int64
195400     7528
195401     3981
195402    13310
195403    14233
Name: KeywordID, dtype: int64
195430     7528
195431    12044
195432    11672
195433    13310
195434    14233
Name: KeywordID, dtype: int64
195555     7601
195556    10983
195557    13310
195558    14233
Name: KeywordID, dtype: int64
195599     7528
195600    13310
195601    14233
Name: KeywordID, dtype: int64
195764     7601
195765    10983
195766    13310
195767    14233
Name: KeywordID, dtype: int64
195903     7528
195904    13310
195905    14233
Name: KeywordID, dtype: int64
195944     7528
195945    27802
195946    13310
195947    14233
Name: KeywordID, dtype: int64
196116     7601
196117    10983
196118    13310
196119    14233
Name: KeywordID, dtype: int64
196208     7601
196209    15338
196210    29878
196211    13

216106     7601
216107    13310
216108    14233
Name: KeywordID, dtype: int64
216131     7601
216132    10853
216133    15338
216134    13310
216135    14233
Name: KeywordID, dtype: int64
216227     7528
216228    12288
216229    13310
216230    14233
Name: KeywordID, dtype: int64
216279     7601
216280    12127
216281    13310
216282    14233
Name: KeywordID, dtype: int64
216464     7601
216465    12044
216466    13310
216467    14233
Name: KeywordID, dtype: int64
216595     7601
216596    10983
216597    16328
216598    13310
216599    14233
Name: KeywordID, dtype: int64
216704     7601
216705    12044
216706    13310
216707    14226
Name: KeywordID, dtype: int64
216814     7601
216815    10983
216816    13310
216817    14233
Name: KeywordID, dtype: int64
216949     7528
216950    10853
216951    13310
216952    14233
Name: KeywordID, dtype: int64
217119     7601
217120    13310
217121    14233
Name: KeywordID, dtype: int64
217216     7601
217217    12044
217218    13310
217219    14

235130     7601
235131    10983
235132    13310
235133    14232
Name: KeywordID, dtype: int64
235381     7601
235382    12044
235383    13310
235384    14233
Name: KeywordID, dtype: int64
235409     7601
235410    10983
235411    14280
235412    13310
235413    14233
Name: KeywordID, dtype: int64
235463     7528
235464    37716
235465    10698
235466    14914
235467    13310
235468    16192
235469    20206
235470     6074
Name: KeywordID, dtype: int64
235569     7601
235570    12044
235571    13310
235572    14233
Name: KeywordID, dtype: int64
235595     7601
235596    10983
235597    12044
235598    13310
235599    14233
Name: KeywordID, dtype: int64
235725     7601
235726    13310
235727    14233
Name: KeywordID, dtype: int64
235788     7601
235789    13310
235790    14233
Name: KeywordID, dtype: int64
235887     7528
235888    10983
235889    13310
235890    14233
Name: KeywordID, dtype: int64
236036     7601
236037    12044
236038    14280
236039    13310
236040    14233
Name: Keyw

254716     7528
254717    10853
254718    13310
254719    14233
Name: KeywordID, dtype: int64
254766     7601
254767    12044
254768    13310
254769    38840
254770    14233
Name: KeywordID, dtype: int64
254830     7601
254831     9107
254832    12044
254833    14280
254834    13310
254835    14233
Name: KeywordID, dtype: int64
255025     7528
255026    13310
255027    14233
Name: KeywordID, dtype: int64
255122     7601
255123    10983
255124    13310
255125    14233
Name: KeywordID, dtype: int64
255181     7601
255182    12044
255183    13310
255184    14233
Name: KeywordID, dtype: int64
255300     7601
255301    10983
255302    15774
255303    13310
255304    14233
255305    12161
255306    16182
255307    16345
255308    15232
Name: KeywordID, dtype: int64
255333     7528
255334    10852
255335    13310
255336    14233
Name: KeywordID, dtype: int64
255378     7528
255379    10983
255380    16123
255381    13310
255382    14233
Name: KeywordID, dtype: int64
255506     7601
255507    

276028     7528
276029    13310
276030    14233
Name: KeywordID, dtype: int64
276056     7601
276057    13310
276058    14233
Name: KeywordID, dtype: int64
276301     7601
276302    13310
276303    14233
Name: KeywordID, dtype: int64
276636     7528
276637    16123
276638    13310
276639    14233
Name: KeywordID, dtype: int64
276818     7601
276819    12044
276820    11672
276821    13310
276822    14233
Name: KeywordID, dtype: int64
276912     7601
276913    10983
276914    13310
276915    14226
Name: KeywordID, dtype: int64
276992     7601
276993    12126
276994    11672
276995    13310
276996    14226
Name: KeywordID, dtype: int64
277027     7528
277028    13310
277029    14233
277030    14307
Name: KeywordID, dtype: int64
277159     7601
277160    12044
277161    13310
277162    38840
277163    14233
Name: KeywordID, dtype: int64
277238     7601
277239    12044
277240    16328
277241    14280
277242    13310
277243    14233
Name: KeywordID, dtype: int64
277304     7601
277305    12

298373     7528
298374    10983
298375    13310
298376    14233
Name: KeywordID, dtype: int64
298411     7601
298412    16328
298413    13310
298414    14233
Name: KeywordID, dtype: int64
298662     7528
298663    13310
298664    14233
Name: KeywordID, dtype: int64
298723     7528
298724    13310
298725    14233
298726    16142
Name: KeywordID, dtype: int64
298749     7601
298750    14280
298751    13310
298752    14233
Name: KeywordID, dtype: int64
298859     7601
298860    13310
298861    14233
Name: KeywordID, dtype: int64
298953     7601
298954    10853
298955    32953
298956    13310
298957    14233
Name: KeywordID, dtype: int64
298992     7601
298993    10983
298994    10853
298995    29878
298996    13310
298997    14233
Name: KeywordID, dtype: int64
299077     7601
299078    13310
299079    14233
Name: KeywordID, dtype: int64
299200     7601
299201    14280
299202    13310
299203    14233
Name: KeywordID, dtype: int64
299289     7601
299290    10983
299291    13310
299292    14

318553     7601
318554    15088
318555    13310
318556    14233
Name: KeywordID, dtype: int64
318639     7528
318640    12044
318641    16123
318642    13310
318643    14233
Name: KeywordID, dtype: int64
318718     7601
318719    10983
318720    13310
318721    14233
318722    31870
Name: KeywordID, dtype: int64
318856     7601
318857     2857
318858    13310
318859    14233
Name: KeywordID, dtype: int64
318888     7601
318889    10983
318890    13310
318891    14233
Name: KeywordID, dtype: int64
319006     7601
319007    13310
319008    14232
Name: KeywordID, dtype: int64
319187     7601
319188    10853
319189    14280
319190    13310
319191    14233
Name: KeywordID, dtype: int64
319250     7601
319251    16328
319252    13310
319253    14233
Name: KeywordID, dtype: int64
319286     7601
319287    15088
319288    13310
319289    14233
Name: KeywordID, dtype: int64
320094     7601
320095    10983
320096    16111
320097    16328
320098    15223
320099    15774
320100    13310
320101    

339357     7601
339358    16503
339359    26611
339360    13310
339361    14233
339362    25452
Name: KeywordID, dtype: int64
339424     7601
339425    10852
339426    18925
339427    16328
339428    15223
339429    13819
339430    13310
339431    14233
Name: KeywordID, dtype: int64
339550     7528
339551    13310
339552    14233
Name: KeywordID, dtype: int64
339656     7528
339657    12044
339658    14280
339659    13310
339660    14232
Name: KeywordID, dtype: int64
339715     7528
339716    14280
339717    13310
339718    14233
Name: KeywordID, dtype: int64
339830     7601
339831    12044
339832    13310
339833    14233
339834    37873
Name: KeywordID, dtype: int64
340157     7601
340158    12044
340159     4047
340160    13310
340161    14233
Name: KeywordID, dtype: int64
340225     7528
340226    13310
340227    14233
Name: KeywordID, dtype: int64
340259     7601
340260    13310
340261    14232
Name: KeywordID, dtype: int64
340303     7601
340304    13310
340305    14233
Name: Keyw

361527     7601
361528    13310
361529    14232
Name: KeywordID, dtype: int64
361620     7601
361621    16328
361622    13310
361623    14233
Name: KeywordID, dtype: int64
361657     7528
361658    10983
361659    13310
361660    14233
Name: KeywordID, dtype: int64
361682     7601
361683    10853
361684    13310
361685    14233
Name: KeywordID, dtype: int64
361888     7601
361889    13310
361890    14233
Name: KeywordID, dtype: int64
362001     7601
362002    10983
362003    13310
362004    14232
Name: KeywordID, dtype: int64
362176     7601
362177    13310
362178    14233
362179     6861
362180    16345
Name: KeywordID, dtype: int64
362374     7601
362375    16328
362376    13310
362377    14233
Name: KeywordID, dtype: int64
362420     7601
362421    16471
362422    13310
362423    14233
362424     4024
Name: KeywordID, dtype: int64
362531     7601
362532    13310
362533    14233
Name: KeywordID, dtype: int64
362600     7601
362601    13310
362602    14233
Name: KeywordID, dtype: int6

386173     7601
386174    10983
386175    13310
386176    14233
Name: KeywordID, dtype: int64
386244     7601
386245    13310
386246    14233
Name: KeywordID, dtype: int64
386320     7528
386321    10983
386322    13310
386323    14233
386324     6861
Name: KeywordID, dtype: int64
386446     7601
386447    21330
386448    10983
386449    13310
386450    14232
Name: KeywordID, dtype: int64
386561     7601
386562    10983
386563    12044
386564    13310
386565    14233
386566     6861
Name: KeywordID, dtype: int64
386671     7601
386672    10983
386673     4038
386674    13310
386675    14233
Name: KeywordID, dtype: int64
386722     7601
386723    13310
386724    14233
Name: KeywordID, dtype: int64
386777     7528
386778    10853
386779    13310
386780    14233
Name: KeywordID, dtype: int64
386797     7601
386798     2857
386799    10983
386800    12044
386801    13310
386802    14233
Name: KeywordID, dtype: int64
386946     7601
386947    12044
386948    13310
386949    14233
Name: Keyw

406099     7528
406100    10983
406101    11672
406102    13310
406103    14233
Name: KeywordID, dtype: int64
406228     7601
406229    10853
406230    12044
406231    13310
406232    14233
Name: KeywordID, dtype: int64
406283     7601
406284    16328
406285    13819
406286    14280
406287    13310
406288    14233
Name: KeywordID, dtype: int64
406368     7528
406369    10853
406370    13310
406371    14232
Name: KeywordID, dtype: int64
406494     7601
406495    14280
406496    13310
406497    14233
Name: KeywordID, dtype: int64
406600     7601
406601    12044
406602     4047
406603    13310
406604    14233
Name: KeywordID, dtype: int64
406832     7601
406833    16882
406834    16328
406835    15223
406836    15774
406837    13310
406838    14233
Name: KeywordID, dtype: int64
407188     7601
407189    14280
407190    13310
407191    14233
407192     6861
Name: KeywordID, dtype: int64
407232     7601
407233    10983
407234    13310
407235    14233
Name: KeywordID, dtype: int64
407457    

433594     7528
433595    17246
433596    13310
433597    14233
Name: KeywordID, dtype: int64
433739     7601
433740    13310
433741    14233
Name: KeywordID, dtype: int64
434579     7601
434580    15187
434581    13310
434582    14226
Name: KeywordID, dtype: int64
434705     7601
434706    13310
434707    14233
Name: KeywordID, dtype: int64
434762     7601
434763    13310
434764    14233
Name: KeywordID, dtype: int64
434899     7601
434900    13310
434901    14233
Name: KeywordID, dtype: int64
434958     7528
434959    13310
434960    14232
434961    15180
Name: KeywordID, dtype: int64
435054     7601
435055    10853
435056    13310
435057    14233
Name: KeywordID, dtype: int64
435158     7528
435159    12044
435160    13310
435161    14233
Name: KeywordID, dtype: int64
435240     7528
435241    14280
435242    13310
435243    14233
Name: KeywordID, dtype: int64
435365     7601
435366    13310
435367    14233
Name: KeywordID, dtype: int64
435487     7601
435488    13310
435489    1423

456219     7601
456220    14918
456221    26826
456222    13310
456223    14233
Name: KeywordID, dtype: int64
456320     7528
456321    10983
456322    13310
456323    14233
Name: KeywordID, dtype: int64
456684     7528
456685    12044
456686    13310
456687    14233
Name: KeywordID, dtype: int64
456724     7601
456725    13310
456726    14233
Name: KeywordID, dtype: int64
457147     7601
457148    13310
457149    14233
Name: KeywordID, dtype: int64
457395     7601
457396    13310
457397    14233
Name: KeywordID, dtype: int64
457497     7601
457498    15116
457499    12044
457500    11672
457501    14280
457502    13310
457503    14233
Name: KeywordID, dtype: int64
457797     7601
457798    10853
457799    13310
457800    14233
Name: KeywordID, dtype: int64
457889     7601
457890    10853
457891    16328
457892    13310
457893    14233
Name: KeywordID, dtype: int64
457941     7528
457942    42993
457943    26611
457944    13310
457945    14233
Name: KeywordID, dtype: int64
457989     7

480651     7528
480652    10983
480653    13310
480654    14233
Name: KeywordID, dtype: int64
480689     7601
480690    10698
480691    13310
480692    14233
Name: KeywordID, dtype: int64
480836     7601
480837    13310
480838    14233
Name: KeywordID, dtype: int64
481589     7601
481590    10983
481591    13310
481592    14233
Name: KeywordID, dtype: int64
481633     7601
481634    12044
481635    13819
481636    13310
481637    14233
Name: KeywordID, dtype: int64
481733     7601
481734    13310
481735    14232
Name: KeywordID, dtype: int64
482001     7601
482002    13310
482003    14233
Name: KeywordID, dtype: int64
482281     7601
482282    10983
482283    13310
482284    14233
Name: KeywordID, dtype: int64
482383     7601
482384    13310
482385    14233
482386    15180
482387     6861
Name: KeywordID, dtype: int64
482624     7601
482625    13310
482626    14233
Name: KeywordID, dtype: int64
482682     7601
482683    12044
482684    14280
482685    13310
482686    14233
Name: Keywor

501385     7601
501386    10983
501387    13310
501388    14233
Name: KeywordID, dtype: int64
501640     7601
501641    13310
501642    14233
Name: KeywordID, dtype: int64
501731     7601
501732    10853
501733    13310
501734    14233
501735    14226
Name: KeywordID, dtype: int64
501896     7528
501897    13819
501898    13310
501899    14233
Name: KeywordID, dtype: int64
501972     7601
501973    13310
501974    14233
Name: KeywordID, dtype: int64
502042     7601
502043    13310
502044    14233
Name: KeywordID, dtype: int64
502119     7601
502120    12044
502121    13310
502122    14233
Name: KeywordID, dtype: int64
502173     7601
502174    10983
502175     4227
502176    13310
502177    14233
Name: KeywordID, dtype: int64
502382     7528
502383    13310
502384    14233
Name: KeywordID, dtype: int64
502560     7601
502561     7473
502562    15153
502563    13310
502564    14233
502565    12918
502566    11752
Name: KeywordID, dtype: int64
502624     7601
502625    13310
502626    14

523547     7601
523548    16328
523549    13310
523550    14233
Name: KeywordID, dtype: int64
523902     7601
523903    13310
523904    14233
Name: KeywordID, dtype: int64
523984     7601
523985    12044
523986    13310
523987    14233
Name: KeywordID, dtype: int64
524220     7601
524221    10983
524222    13310
524223    14233
Name: KeywordID, dtype: int64
524403     7601
524404    16503
524405     4227
524406    13310
524407    14233
524408    25452
Name: KeywordID, dtype: int64
524514    13018
524515    12044
524516    11672
524517    14280
524518     4047
524519    13310
524520    14233
Name: KeywordID, dtype: int64
524574     7528
524575    12044
524576    13310
524577    14233
Name: KeywordID, dtype: int64
524882     7601
524883    10983
524884    13310
524885    14233
Name: KeywordID, dtype: int64
524923     7528
524924    10983
524925    13310
524926    14233
Name: KeywordID, dtype: int64
525179     7601
525180    13310
525181    14233
Name: KeywordID, dtype: int64
525233     7

10807     7528
10808    13310
10809    14233
Name: KeywordID, dtype: int64
10861     7601
10862    13310
10863    14233
Name: KeywordID, dtype: int64
11372     7528
11373    22828
11374    10983
11375    13310
11376    14233
Name: KeywordID, dtype: int64
11565     7601
11566    12044
11567    16328
11568    14280
11569     4047
11570    13310
11571    14233
Name: KeywordID, dtype: int64
11736     7601
11737    75674
11738    16328
11739    13310
11740    14233
Name: KeywordID, dtype: int64
12020     7601
12021    13310
12022    14233
Name: KeywordID, dtype: int64
12208     7601
12209    13310
12210    14233
Name: KeywordID, dtype: int64
12268     7601
12269    13310
12270    14233
Name: KeywordID, dtype: int64
12467     7601
12468    16103
12469    16328
12470    15223
12471    13819
12472    15774
12473    13310
12474    14233
Name: KeywordID, dtype: int64
12497     7601
12498    10807
12499    16328
12500    26611
12501    13310
12502    14233
Name: KeywordID, dtype: int64
12858     

31753     7601
31754    12044
31755     3241
31756     3242
31757    11672
31758    14280
31759    13310
31760    14233
Name: KeywordID, dtype: int64
31795     7601
31796    13310
31797    14233
Name: KeywordID, dtype: int64
31903     7601
31904    13310
31905    14233
Name: KeywordID, dtype: int64
31944     7601
31945    12044
31946    14280
31947    13310
31948    14233
Name: KeywordID, dtype: int64
32058     7601
32059    10853
32060    13310
32061    14233
Name: KeywordID, dtype: int64
32379     7601
32380    13310
32381    14233
Name: KeywordID, dtype: int64
32442     7528
32443    10853
32444     4071
32445    13310
32446    14233
Name: KeywordID, dtype: int64
32600     7601
32601     2857
32602    10853
32603    16328
32604     4227
32605    13310
32606    14233
Name: KeywordID, dtype: int64
32630     7601
32631    13310
32632    14233
Name: KeywordID, dtype: int64
32695     7528
32696    16123
32697    14280
32698    13310
32699    14233
Name: KeywordID, dtype: int64
32774     

53278     7601
53279    16503
53280    15455
53281    16328
53282     4227
53283    13310
53284    14233
Name: KeywordID, dtype: int64
53772     7601
53773     3241
53774    14280
53775    13310
53776    14233
Name: KeywordID, dtype: int64
53848     7601
53849    10853
53850    13310
53851    14233
53852    16192
53853     4308
Name: KeywordID, dtype: int64
53961     7601
53962    13310
53963    14233
Name: KeywordID, dtype: int64
54079     7601
54080    10853
54081    16328
54082    13310
54083    14233
Name: KeywordID, dtype: int64
54105     7601
54106    12044
54107    13310
54108    14233
Name: KeywordID, dtype: int64
54379     7601
54380    10983
54381    13310
54382    14233
Name: KeywordID, dtype: int64
54627     7601
54628    16328
54629    26611
54630    13310
54631    14233
54632    31870
Name: KeywordID, dtype: int64
54772     7601
54773    10853
54774    12044
54775    11672
54776    14280
54777    13310
54778    14233
Name: KeywordID, dtype: int64
55028     7601
55029    1

76297     7601
76298    11672
76299    13310
76300    14233
Name: KeywordID, dtype: int64
76427     7601
76428    26399
76429    13310
76430    14226
Name: KeywordID, dtype: int64
76578     7601
76579    13310
76580    14233
Name: KeywordID, dtype: int64
76647     7601
76648    16103
76649    16328
76650    15774
76651    13310
76652    14233
Name: KeywordID, dtype: int64
76693     7601
76694    13310
76695    14232
Name: KeywordID, dtype: int64
76719     7601
76720    10853
76721    13310
76722    14233
Name: KeywordID, dtype: int64
76833     7601
76834    13310
76835    14233
Name: KeywordID, dtype: int64
76984     7528
76985    10983
76986    13310
76987    14233
Name: KeywordID, dtype: int64
77064     7601
77065    12044
77066    16328
77067    13310
77068    14233
Name: KeywordID, dtype: int64
77139     7601
77140    13310
77141    14233
Name: KeywordID, dtype: int64
77625     7601
77626    15098
77627    13310
77628    14233
Name: KeywordID, dtype: int64
77880     7528
77881    1

97067     7601
97068    16328
97069    29839
97070    13310
97071    14233
Name: KeywordID, dtype: int64
97247     7601
97248    12044
97249    13310
97250    14233
Name: KeywordID, dtype: int64
97346     7601
97347    14280
97348    13310
97349    14233
Name: KeywordID, dtype: int64
98299     7601
98300    13310
98301    14233
Name: KeywordID, dtype: int64
98342     7528
98343    10983
98344    15629
98345    13310
98346    14233
Name: KeywordID, dtype: int64
98449     7601
98450    12044
98451    11672
98452    14280
98453    13310
98454    14233
Name: KeywordID, dtype: int64
98509     7601
98510    15107
98511     4227
98512    13310
98513    14233
98514     4066
Name: KeywordID, dtype: int64
98881     7601
98882    10983
98883    12044
98884    13310
98885    14233
Name: KeywordID, dtype: int64
98922     7601
98923    12044
98924    13819
98925     4047
98926    13310
98927    14233
Name: KeywordID, dtype: int64
99255     7601
99256    12044
99257    13310
99258    14233
Name: Keyw

118840     7601
118841    14280
118842    13310
118843    14233
Name: KeywordID, dtype: int64
118898     7601
118899    14280
118900    13310
118901    14233
Name: KeywordID, dtype: int64
119233     7528
119234    13310
119235    14233
Name: KeywordID, dtype: int64
119372     7528
119373    13819
119374    13310
119375    14233
Name: KeywordID, dtype: int64
119404     7528
119405    13310
119406    14233
Name: KeywordID, dtype: int64
119819     7601
119820    13310
119821    14233
Name: KeywordID, dtype: int64
119868     7528
119869    13310
119870    14233
Name: KeywordID, dtype: int64
120062     7528
120063    13310
120064    14233
Name: KeywordID, dtype: int64
120115     7601
120116    13310
120117    14233
Name: KeywordID, dtype: int64
120165     7601
120166    13310
120167    14233
120168    15773
120169    12161
120170    16182
120171    16345
120172    15232
Name: KeywordID, dtype: int64
120209     7601
120210    13310
120211    14233
Name: KeywordID, dtype: int64
120450     760

143446     7601
143447    13310
143448    14233
Name: KeywordID, dtype: int64
143864     7601
143865    26469
143866    14280
143867    13310
143868    14233
Name: KeywordID, dtype: int64
143997     7601
143998    13310
143999    14226
144000    16192
Name: KeywordID, dtype: int64
144222     7601
144223    13310
144224    14232
144225    14233
144226    15180
144227     4308
Name: KeywordID, dtype: int64
144635     7601
144636    12044
144637    13214
144638    10807
144639    16323
144640    16328
144641     4047
144642    13310
144643    14233
Name: KeywordID, dtype: int64
144710     7601
144711    13310
144712    14233
Name: KeywordID, dtype: int64
144773     7601
144774    13310
144775    14233
Name: KeywordID, dtype: int64
145046     7528
145047    10983
145048    13310
145049    14233
Name: KeywordID, dtype: int64
145348     7601
145349    12044
145350    14280
145351    13310
145352    14233
Name: KeywordID, dtype: int64
145390     7528
145391     7601
145392    10983
145393    

167649     7601
167650    13310
167651    14233
Name: KeywordID, dtype: int64
167765     7601
167766    13310
167767    14233
Name: KeywordID, dtype: int64
168720     7528
168721    13310
168722    14233
Name: KeywordID, dtype: int64
168964     7601
168965    16108
168966    16328
168967    13310
168968    14233
Name: KeywordID, dtype: int64
169135     7601
169136    12044
169137    13310
169138    14233
Name: KeywordID, dtype: int64
169202     7601
169203    13310
169204    14233
Name: KeywordID, dtype: int64
169324     7601
169325    26826
169326    12321
169327    13310
169328    14232
169329    14233
169330    16192
Name: KeywordID, dtype: int64
169537     7601
169538    10983
169539    10853
169540    13310
169541    14233
Name: KeywordID, dtype: int64
169752     7528
169753    10983
169754    13310
169755    14233
Name: KeywordID, dtype: int64
169813     7528
169814    13310
169815    14233
Name: KeywordID, dtype: int64
170298     7601
170299    10945
170300    13310
170301    14

191238     7601
191239    13310
191240    14233
Name: KeywordID, dtype: int64
191354     7601
191355    12044
191356    13310
191357    14233
Name: KeywordID, dtype: int64
191690     7601
191691    12044
191692    16328
191693    13310
191694    14233
Name: KeywordID, dtype: int64
191824     7528
191825    13819
191826    13310
191827    14233
Name: KeywordID, dtype: int64
191893     7528
191894    13310
191895    14232
191896    14233
191897    14226
Name: KeywordID, dtype: int64
192047     7601
192048    10983
192049    12044
192050     4047
192051    13310
192052    14233
192053     6861
Name: KeywordID, dtype: int64
192634     7601
192635    14280
192636    13310
192637    14233
Name: KeywordID, dtype: int64
192683     7601
192684    12044
192685    11672
192686    14280
192687    13310
192688    14233
Name: KeywordID, dtype: int64
192745     7601
192746    10853
192747    12044
192748    14280
192749    13310
192750    14233
Name: KeywordID, dtype: int64
192783     7601
192784    

216277     7601
216278    10853
216279    13310
216280    14233
Name: KeywordID, dtype: int64
216310     7528
216311    13310
216312    14233
216313    11735
Name: KeywordID, dtype: int64
216367     7601
216368    10983
216369    13310
216370    14233
Name: KeywordID, dtype: int64
216667     7601
216668     4227
216669    13310
216670    14233
216671    25452
Name: KeywordID, dtype: int64
216731     7601
216732    13310
216733    14232
216734    14233
Name: KeywordID, dtype: int64
217174     7601
217175    10853
217176    13310
217177    14233
Name: KeywordID, dtype: int64
217357     7601
217358    10983
217359    13310
217360    14232
Name: KeywordID, dtype: int64
217418     7601
217419    12044
217420    10698
217421    13310
217422    14232
Name: KeywordID, dtype: int64
217633     7528
217634     2857
217635    12044
217636    13310
217637    14233
Name: KeywordID, dtype: int64
217654     7601
217655    10983
217656    13310
217657    14233
Name: KeywordID, dtype: int64
217709     7

239738     7601
239739    10853
239740    13310
239741    14233
Name: KeywordID, dtype: int64
239848     7601
239849    10983
239850    13310
239851    14233
Name: KeywordID, dtype: int64
240080     7601
240081    13310
240082    14233
240083    16192
Name: KeywordID, dtype: int64
240295     7528
240296    10853
240297    12044
240298    13310
240299    14233
Name: KeywordID, dtype: int64
240334     7528
240335    12044
240336    14280
240337    13310
240338    14233
Name: KeywordID, dtype: int64
240387     7528
240388    13310
240389    14233
Name: KeywordID, dtype: int64
240456     7528
240457    22828
240458    16111
240459    16123
240460    13310
240461    14233
Name: KeywordID, dtype: int64
240493     7601
240494    13310
240495    14233
Name: KeywordID, dtype: int64
240607     7528
240608    10852
240609    43014
240610    13310
240611    14233
Name: KeywordID, dtype: int64
240704     7528
240705    10853
240706    17246
240707    13310
240708    14232
Name: KeywordID, dtype: in

260350     7601
260351    39065
260352    16328
260353    15223
260354    15774
260355    13310
260356    14233
260357    14307
Name: KeywordID, dtype: int64
260512     7601
260513    13310
260514    14233
Name: KeywordID, dtype: int64
260541     7528
260542    13310
260543    14233
Name: KeywordID, dtype: int64
260972     7601
260973    13310
260974    14233
Name: KeywordID, dtype: int64
261143     7601
261144    12044
261145    13310
261146    14232
Name: KeywordID, dtype: int64
261405     7601
261406    10983
261407    13310
261408    14226
Name: KeywordID, dtype: int64
261608     7601
261609    12044
261610    13819
261611    13310
261612    14233
Name: KeywordID, dtype: int64
261676     7601
261677    10853
261678    13310
261679    14232
Name: KeywordID, dtype: int64
262146     7601
262147    11672
262148    13310
262149    14233
Name: KeywordID, dtype: int64
262288     7601
262289    10853
262290    13310
262291    14233
Name: KeywordID, dtype: int64
262404     7601
262405    10

287728     7601
287729    26399
287730    12044
287731    16503
287732    13819
287733     4227
287734    13310
287735    14233
287736    25452
Name: KeywordID, dtype: int64
287837     7601
287838    14280
287839    13310
287840    14233
Name: KeywordID, dtype: int64
288193     7601
288194    10983
288195    13310
288196    14233
Name: KeywordID, dtype: int64
288461     7601
288462    13310
288463    14233
288464    14226
Name: KeywordID, dtype: int64
288619     7601
288620    13310
288621    14233
Name: KeywordID, dtype: int64
288788     7601
288789    12044
288790     4047
288791    13310
288792    14233
Name: KeywordID, dtype: int64
288914     7601
288915    13310
288916    14233
Name: KeywordID, dtype: int64
288993    13018
288994    13310
288995    14233
288996    15653
Name: KeywordID, dtype: int64
289243     7601
289244    12044
289245    13310
289246    14233
Name: KeywordID, dtype: int64
289319     7601
289320    15104
289321    10853
289322    32953
289323    13310
289324    

315585     7601
315586    13310
315587    14233
Name: KeywordID, dtype: int64
315759     7528
315760    13214
315761    16123
315762     3242
315763    13310
Name: KeywordID, dtype: int64
315802     7601
315803    15088
315804    13310
315805    14233
Name: KeywordID, dtype: int64
315849     7601
315850    10983
315851    13310
315852    14233
Name: KeywordID, dtype: int64
316064     7601
316065    10853
316066    26611
316067    13310
316068    14233
Name: KeywordID, dtype: int64
316471     7601
316472    13310
316473    14233
Name: KeywordID, dtype: int64
316587     7528
316588    16451
316589    10983
316590    48911
316591    13310
316592    14233
316593    14226
Name: KeywordID, dtype: int64
316784     7528
316785    10983
316786    13310
316787    14233
Name: KeywordID, dtype: int64
316885     7601
316886    10853
316887    12044
316888    13310
316889    14233
Name: KeywordID, dtype: int64
317034     7601
317035    13310
317036    14233
Name: KeywordID, dtype: int64
317099     7

340497     7601
340498    12044
340499    41446
340500    15455
340501    13310
340502    14233
Name: KeywordID, dtype: int64
340678     7601
340679    13310
340680    14233
Name: KeywordID, dtype: int64
340803     7528
340804    13310
340805    14226
Name: KeywordID, dtype: int64
341370     7601
341371    12044
341372    13310
341373    14233
Name: KeywordID, dtype: int64
341437     7601
341438    10983
341439    13310
341440    14233
341441    14226
Name: KeywordID, dtype: int64
341589     7601
341590    10983
341591    13310
341592    14233
Name: KeywordID, dtype: int64
341700     7528
341701    10853
341702    13310
341703    14233
Name: KeywordID, dtype: int64
341744     7601
341745    12044
341746    26662
341747    13819
341748    14280
341749    13310
341750    14233
Name: KeywordID, dtype: int64
341981     7601
341982    10983
341983    13310
341984    14233
Name: KeywordID, dtype: int64
342220     7601
342221    13310
342222    14232
Name: KeywordID, dtype: int64
342279     7

360576     7528
360577     7601
360578    12044
360579    13310
360580    38840
360581    14233
Name: KeywordID, dtype: int64
360748     7601
360749    12044
360750    14280
360751    13310
360752    14232
Name: KeywordID, dtype: int64
360927     7601
360928    10983
360929    13310
360930    14233
Name: KeywordID, dtype: int64
361427     7601
361428    10983
361429     4227
361430    13310
361431    14233
361432    14917
361433    20206
Name: KeywordID, dtype: int64
361542     7601
361543    10853
361544    12044
361545    13310
361546    14233
Name: KeywordID, dtype: int64
361615     7601
361616    13310
361617    14233
Name: KeywordID, dtype: int64
361744     7601
361745    13310
361746    14233
Name: KeywordID, dtype: int64
361831     7601
361832    10698
361833    13310
361834    14233
Name: KeywordID, dtype: int64
361926     7528
361927    10853
361928    13310
361929    14233
Name: KeywordID, dtype: int64
361964     7601
361965    13310
361966    14233
Name: KeywordID, dtype: in

389915     7601
389916    13310
389917    14233
389918    15468
Name: KeywordID, dtype: int64
390481     7601
390482    13310
390483    14233
Name: KeywordID, dtype: int64
390524     7528
390525    10983
390526    13310
390527    14233
Name: KeywordID, dtype: int64
391051     7528
391052    13310
391053    14233
Name: KeywordID, dtype: int64
391098     7528
391099    16503
391100    13310
391101    14233
391102    17207
Name: KeywordID, dtype: int64
391169     7601
391170    12044
391171    13310
391172    14233
Name: KeywordID, dtype: int64
391277     7601
391278    12044
391279    13819
391280    11672
391281    14280
391282    13310
391283    14233
391284     6861
Name: KeywordID, dtype: int64
391607     7528
391608    16123
391609    13310
391610    14233
Name: KeywordID, dtype: int64
391764     7601
391765    10983
391766    12044
391767    13310
391768    14233
Name: KeywordID, dtype: int64
391810     7601
391811    13310
391812    14233
Name: KeywordID, dtype: int64
391839     7

414909     7601
414910    13310
414911    14233
Name: KeywordID, dtype: int64
415152     7601
415153    16328
415154    13310
415155    14233
Name: KeywordID, dtype: int64
415293     7528
415294    16123
415295    13310
415296    14233
Name: KeywordID, dtype: int64
415418     7528
415419    15088
415420    13310
415421    14233
Name: KeywordID, dtype: int64
415481     7601
415482    10983
415483    13310
415484    14233
Name: KeywordID, dtype: int64
415548     7601
415549    13310
415550    14233
Name: KeywordID, dtype: int64
415572     7601
415573    13310
415574    14233
Name: KeywordID, dtype: int64
415653     7601
415654    16328
415655    13310
415656    14233
Name: KeywordID, dtype: int64
415935     7528
415936    13310
415937    14233
Name: KeywordID, dtype: int64
416152     7601
416153    13310
416154    14233
Name: KeywordID, dtype: int64
416214     7601
416215    13310
416216    14233
Name: KeywordID, dtype: int64
416350     7601
416351    13310
416352    14233
Name: KeywordI

435170     7601
435171    13310
435172    14233
435173    16345
Name: KeywordID, dtype: int64
435254     7601
435255    13310
435256    14233
Name: KeywordID, dtype: int64
435287     7601
435288    16328
435289    15337
435290    13310
435291    14233
Name: KeywordID, dtype: int64
435390     7528
435391    13310
435392    14233
Name: KeywordID, dtype: int64
435550     7601
435551    13310
435552    14233
435553    16192
Name: KeywordID, dtype: int64
435732     7601
435733    12044
435734    13310
435735    14233
Name: KeywordID, dtype: int64
435774     7601
435775    13819
435776    13310
435777    14233
Name: KeywordID, dtype: int64
435845     7601
435846    12044
435847    13310
435848    14233
Name: KeywordID, dtype: int64
435915     7601
435916    16328
435917    13310
435918    14233
Name: KeywordID, dtype: int64
436055     7601
436056    12044
436057    13310
436058    14233
Name: KeywordID, dtype: int64
436097     7528
436098    16123
436099    13310
436100    14233
Name: Keywor

452746     7601
452747    11672
452748    14280
452749    13310
452750    14233
452751    16277
Name: KeywordID, dtype: int64
452982     7601
452983    13310
452984    14233
Name: KeywordID, dtype: int64
453081     7601
453082    12044
453083    13310
453084    14233
Name: KeywordID, dtype: int64
453115     7601
453116    13310
453117    14233
Name: KeywordID, dtype: int64
453340     7601
453341    14280
453342    13310
453343    14233
453344    46670
Name: KeywordID, dtype: int64
453376     7601
453377    12044
453378     4047
453379    13310
453380    14233
Name: KeywordID, dtype: int64
453441     7601
453442    10983
453443    15337
453444    13310
453445    14233
Name: KeywordID, dtype: int64
453510     7601
453511    12044
453512    13310
453513    14232
453514     6861
Name: KeywordID, dtype: int64
453585     7601
453586    10983
453587    13310
453588    14233
Name: KeywordID, dtype: int64
453685     7528
453686    11672
453687    14280
453688    13310
453689    14233
Name: Keyw

488493     7528
488494    16103
488495    16123
488496    15223
488497    15774
488498     4889
488499     8057
488500    12754
488501    27440
488502    13310
488503    14233
Name: KeywordID, dtype: int64
488803     7528
488804    16451
488805    26399
488806    14280
488807    13310
488808    14233
Name: KeywordID, dtype: int64
489002     7601
489003    10983
489004    12288
489005    13310
489006    14233
489007    10005
Name: KeywordID, dtype: int64
489076     7601
489077    10853
489078    12044
489079    11672
489080    14280
489081    13310
489082    14233
Name: KeywordID, dtype: int64
489155     7601
489156    10853
489157    13310
489158    14233
Name: KeywordID, dtype: int64
489507     7601
489508    12044
489509     6777
489510    11672
489511    14280
489512     4047
489513    13310
489514    14226
Name: KeywordID, dtype: int64
489680     7528
489681    10853
489682    12126
489683    14280
489684    13310
489685    14233
Name: KeywordID, dtype: int64
489908     7601
489909

503195     7601
503196    14380
503197    10853
503198    30447
503199    13310
503200    14233
Name: KeywordID, dtype: int64
503477     7601
503478    10983
503479    13310
503480    14233
Name: KeywordID, dtype: int64
503613     7528
503614    10853
503615     3242
503616    14280
503617    13310
503618    14233
Name: KeywordID, dtype: int64
503656     7601
503657    10853
503658    11015
503659    10698
503660    13310
503661    14233
Name: KeywordID, dtype: int64
503808     7601
503809    16451
503810    10983
503811    13875
503812    10853
503813    15585
503814     4227
503815    11672
503816    13310
503817    14233
Name: KeywordID, dtype: int64
504045     7601
504046    10983
504047    13310
504048    14233
Name: KeywordID, dtype: int64
504300     7601
504301    16103
504302    16328
504303    15774
504304    12127
504305    12328
504306     8057
504307    12754
504308    13310
504309    14233
Name: KeywordID, dtype: int64
504417     7528
504418    10852
504419    16103
504420

514312     7601
514313     4227
514314    13310
514315    14233
514316    14917
514317    25452
Name: KeywordID, dtype: int64
514559     7601
514560    10853
514561    13310
514562    14233
Name: KeywordID, dtype: int64
514617     7601
514618    33502
514619    16328
514620    15223
514621    15774
514622    11713
514623    14049
514624    14541
514625    12720
514626    13310
514627    14233
Name: KeywordID, dtype: int64
515148     7601
515149    15774
515150    13310
515151    19670
515152    22937
515153    14233
515154    19672
515155    16345
515156    15232
Name: KeywordID, dtype: int64
515241     7601
515242    10983
515243    21332
515244    14293
515245    13310
515246    14233
515247    14226
Name: KeywordID, dtype: int64
515307     7601
515308     7991
515309    13363
515310    10852
515311    16848
515312    16328
515313    15223
515314    15774
515315    13310
515316    14233
515317    14307
Name: KeywordID, dtype: int64
515765     7528
515766    15337
515767    13310
5157

530601     7601
530602    15098
530603    10853
530604    13214
530605    13310
530606    14233
Name: KeywordID, dtype: int64
530652     7601
530653    10698
530654    13310
530655    14233
Name: KeywordID, dtype: int64
530692    13018
530693    13310
530694    14232
Name: KeywordID, dtype: int64
531430     7528
531431    10983
531432    13310
531433    14232
Name: KeywordID, dtype: int64
531705     7601
531706    10983
531707    13310
531708    14233
Name: KeywordID, dtype: int64
532127     7528
532128    10853
532129    13310
532130    14232
Name: KeywordID, dtype: int64
532366     7601
532367    12044
532368    13310
532369    14233
Name: KeywordID, dtype: int64
532554     7601
532555    10983
532556    13310
532557    14233
Name: KeywordID, dtype: int64
532644     7601
532645    14370
532646    12122
532647    13310
532648    14233
532649    23423
532650    11786
Name: KeywordID, dtype: int64


In [49]:
# bincount (histogram topics in both cases)
bc_first = np.bincount(first_kwIDs, minlength=kwID_kwLabel_map.KeywordID.max())
bc_last = np.bincount(last_kwIDs, minlength=kwID_kwLabel_map.KeywordID.max())

In [74]:
# argsort (backwards) to get highest populated states
idx_first = bc_first.argsort()[::-1]
idx_last = bc_last.argsort()[::-1]

# printed number of topics (only printout)
print_top_n = 15

In [76]:
# left is prevalence of a topic, i.e. fraction of first frames that have this topic
# second is topic, followed by keywordID in brackets.
print('first topics')
for idx in idx_first[:print_top_n]:
    print(f'{np.round(bc_first[idx]/bc_first.sum(), 2)}: '+\
          f'{kwID_kwLabel_map[kwID_kwLabel_map.KeywordID == idx].KeywordLabel.values[0]}' +\
          f'[KeywordID: {idx}]'
         )

first topics
0.23: Oświęcim (Kraków, Poland)[KeywordID: 13310]
0.21: Poland 1944[KeywordID: 14233]
0.18: Auschwitz II-Birkenau (Poland : Death Camp)[KeywordID: 7601]
0.05: Auschwitz (Poland : Concentration Camp)(generic)[KeywordID: 7528]
0.04: camp selections[KeywordID: 12044]
0.04: camp first impressions[KeywordID: 10983]
0.02: camp intake procedures[KeywordID: 10853]
0.02: deportation to Auschwitz II-Birkenau (Poland : Death Camp)[KeywordID: 16328]
0.02: loved ones' separations[KeywordID: 14280]
0.02: Poland 1943[KeywordID: 14232]
0.01: Poland 1944 (July 22) - 1945 (January 16)[KeywordID: 14226]
0.01: freight trains[KeywordID: 15774]
0.01: loved ones' final contacts[KeywordID: 11672]
0.01: Mengele, Josef[KeywordID: 4047]
0.0: deportation to Auschwitz (Poland : Concentration Camp)(generic)[KeywordID: 16123]


In [77]:
print('last topics')
for idx in idx_last[:print_top_n]:
    print(f'{np.round(bc_last[idx]/bc_last.sum(), 2)}: '+\
          f'{kwID_kwLabel_map[kwID_kwLabel_map.KeywordID == idx].KeywordLabel.values[0]}' +\
          f'[KeywordID: {idx}]'
         )

last topics
0.25: Oświęcim (Kraków, Poland)[KeywordID: 13310]
0.19: Auschwitz II-Birkenau (Poland : Death Camp)[KeywordID: 7601]
0.19: Poland 1944[KeywordID: 14233]
0.05: Auschwitz (Poland : Concentration Camp)(generic)[KeywordID: 7528]
0.03: transfer from Auschwitz II-Birkenau (Poland : Death Camp)[KeywordID: 16297]
0.02: Poland 1945 (January 1 - May 7)[KeywordID: 16192]
0.01: transfers, means of transport[KeywordID: 15232]
0.01: Germany 1944[KeywordID: 13929]
0.01: Poland 1944 (July 22) - 1945 (January 16)[KeywordID: 14226]
0.01: freight trains[KeywordID: 15774]
0.01: Poland 1943[KeywordID: 14232]
0.01: Germany 1945 (January 1 - May 7)[KeywordID: 13930]
0.01: camp selections[KeywordID: 12044]
0.01: transfer from Auschwitz (Poland : Concentration Camp)(generic)[KeywordID: 16162]
0.01: transfer conditions[KeywordID: 12161]
