# MAPRR Textual Analytics

## Intro

### Import

In [1]:
import os 
import time
import logging
import pandas as pd 
import re
import threading
import requests
from bs4 import BeautifulSoup
from natasha import (
    Segmenter, 
    MorphVocab, 
    NewsEmbedding, 
    NewsMorphTagger, 
    NewsSyntaxParser, 
    NewsNERTagger, 
    PER, 
    NamesExtractor, 
    Doc)
from razdel import tokenize

In [2]:
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

## Pre-processing Function

In [3]:
domain = 'https://mpgrr.herokuapp.com/'

In [4]:
tables = {'agents/': 304,
          'works/': 603, 
          'place_based_concepts/': 315, 
          'locations/': 366, 
          'multivalent_markers/': 433}

In [5]:
class maprr: 
    
    def __init__(self): 
        self.Wsoup = {} 
        self.Asoup = {}
        self.Ws = {}
        self.As = {}
    
    def get_htmlA(self): 
        # list(tables.values())[0]
        for i in range(1, list(tables.values())[0]+1):
            url = domain+list(tables.keys())[0]+str(i)
            with requests.get(url) as r: 
                if r.status_code == 200: 
                    s = BeautifulSoup(r.content, 'html.parser')
                    self.Asoup.update({i:s})
                    #print(f"Appending A{i}")
                    logging.info(f"Appending A{i}")
                    time.sleep(.1)
                else: 
                    print(f"A{i} status code: {r.status_code}")
                    time.sleep(.1)
                    pass

    def get_htmlW(self): 
        # list(tables.values())[1]
        for i in range(1, list(tables.values())[1]+1):
            url = domain+list(tables.keys())[1]+str(i)
            with requests.get(url) as r: 
                if r.status_code == 200: 
                    s = BeautifulSoup(r.content, 'html.parser')
                    self.Wsoup.update({i:s})
                    #print(f"Appending W{i}")
                    logging.info(f"Appending W{i}")
                    time.sleep(.1)
                else: 
                    print(f"W{i} status code: {r.status_code}")
                    time.sleep(.1)
                    pass

    def parseWs(self, html): 
        content = html.find('div', {'class':'col-md-9 fixed-height'})
        try: 
            author = content.div.h3.text
        except: 
            author = ""
        try: 
            title = content.div.h4.text
        except: 
            title = ""
        text = content.find_all('p',{'class':'stanza'})
        Wtext = [x.text.replace('\n','').strip() for x in text]
        metaKeys = [x.text[:-1] for x in html.find('div', {'class':'card-body'}).find_all('h4')]
        metaVals = [x.text for x in html.find('div', {'class':'card-body'}).find_all('p')]
        metaDict = dict(zip(metaKeys, metaVals))
        subDict = {'title': title, 'text': Wtext}
        subDict.update(metaDict)
        #self.Ws.update(subdict)
        return subDict

    def parseAs(self, html): 
        name = html.find('div', {'class': 'card scrollable'}).h2.text
        bdate, ddate = html.find('div', {'class': 'card scrollable'}).span.text.split(' - ')
        subDict = {'name': name, 'birth': bdate, 'death': ddate}
        try: 
            typeKeys = [x.h4.text for x in html.find_all('div', {'class': 'col-md-4'})]
            typeVals = [x.p or x.div.span.text for x in html.find_all('div', {'class': 'col-md-4'})]
            typeVals[:2] = [x.text for x in typeVals[:2]]
            typeDict = dict(zip(typeKeys, typeVals))
            subDict.update(typeDict)
        except: 
            pass
        #self.As.update(subdict)
        return subDict

    def run(self): 
        self.get_htmlA() 
        self.get_htmlW() 

        self.As = {k: self.parseAs(v) for k, v in self.Asoup.items()}
        self.Ws = {k: self.parseWs(v) for k, v in self.Wsoup.items()}

        AsDf = pd.DataFrame.from_dict(self.As, orient='index')
        WsDf = pd.DataFrame.from_dict(self.Ws, orient='index')        

        WsDf.to_csv('WsDf.csv')
        AsDf.to_csv('AsDf.csv')

In [6]:
%%time
if __name__ == '__main__': 
    maprr().run()

INFO:Appending A1
INFO:Appending A2
INFO:Appending A3
INFO:Appending A4
INFO:Appending A5
INFO:Appending A6
INFO:Appending A7
INFO:Appending A8
INFO:Appending A9
INFO:Appending A10
INFO:Appending A11
INFO:Appending A12
INFO:Appending A13
INFO:Appending A14
INFO:Appending A15
INFO:Appending A16
INFO:Appending A17
INFO:Appending A18
INFO:Appending A19
INFO:Appending A20
INFO:Appending A21
INFO:Appending A22
INFO:Appending A23
INFO:Appending A24
INFO:Appending A25
INFO:Appending A26
INFO:Appending A27
INFO:Appending A28
INFO:Appending A29
INFO:Appending A30
INFO:Appending A31
INFO:Appending A32
INFO:Appending A33
INFO:Appending A34
INFO:Appending A35
INFO:Appending A36
INFO:Appending A37
INFO:Appending A38
INFO:Appending A39
INFO:Appending A40
INFO:Appending A41
INFO:Appending A42
INFO:Appending A43
INFO:Appending A44


A45 status code: 500


INFO:Appending A46
INFO:Appending A47
INFO:Appending A48
INFO:Appending A49
INFO:Appending A50
INFO:Appending A51
INFO:Appending A52
INFO:Appending A53
INFO:Appending A54
INFO:Appending A55
INFO:Appending A56
INFO:Appending A57
INFO:Appending A58
INFO:Appending A59
INFO:Appending A60
INFO:Appending A61
INFO:Appending A62
INFO:Appending A63
INFO:Appending A64
INFO:Appending A65
INFO:Appending A66
INFO:Appending A67
INFO:Appending A68
INFO:Appending A69
INFO:Appending A70
INFO:Appending A71
INFO:Appending A72
INFO:Appending A73


A74 status code: 404
A75 status code: 404
A76 status code: 404
A77 status code: 404


INFO:Appending A78
INFO:Appending A79
INFO:Appending A80
INFO:Appending A81
INFO:Appending A82
INFO:Appending A83
INFO:Appending A84
INFO:Appending A85
INFO:Appending A86
INFO:Appending A87
INFO:Appending A88
INFO:Appending A89
INFO:Appending A90
INFO:Appending A91
INFO:Appending A92
INFO:Appending A93
INFO:Appending A94
INFO:Appending A95
INFO:Appending A96
INFO:Appending A97
INFO:Appending A98
INFO:Appending A99
INFO:Appending A100
INFO:Appending A101
INFO:Appending A102
INFO:Appending A103
INFO:Appending A104
INFO:Appending A105
INFO:Appending A106
INFO:Appending A107
INFO:Appending A108
INFO:Appending A109
INFO:Appending A110
INFO:Appending A111
INFO:Appending A112
INFO:Appending A113
INFO:Appending A114
INFO:Appending A115
INFO:Appending A116
INFO:Appending A117
INFO:Appending A118
INFO:Appending A119
INFO:Appending A120
INFO:Appending A121
INFO:Appending A122
INFO:Appending A123
INFO:Appending A124
INFO:Appending A125
INFO:Appending A126
INFO:Appending A127
INFO:Appending A128
IN

A139 status code: 404
A140 status code: 404


INFO:Appending A141
INFO:Appending A142
INFO:Appending A143
INFO:Appending A144
INFO:Appending A145
INFO:Appending A146
INFO:Appending A147
INFO:Appending A148
INFO:Appending A149
INFO:Appending A150
INFO:Appending A151
INFO:Appending A152
INFO:Appending A153
INFO:Appending A154
INFO:Appending A155
INFO:Appending A156
INFO:Appending A157
INFO:Appending A158
INFO:Appending A159
INFO:Appending A160
INFO:Appending A161
INFO:Appending A162
INFO:Appending A163
INFO:Appending A164
INFO:Appending A165
INFO:Appending A166
INFO:Appending A167
INFO:Appending A168
INFO:Appending A169
INFO:Appending A170
INFO:Appending A171
INFO:Appending A172
INFO:Appending A173
INFO:Appending A174
INFO:Appending A175
INFO:Appending A176
INFO:Appending A177
INFO:Appending A178
INFO:Appending A179
INFO:Appending A180
INFO:Appending A181
INFO:Appending A182
INFO:Appending A183
INFO:Appending A184
INFO:Appending A185
INFO:Appending A186
INFO:Appending A187
INFO:Appending A188
INFO:Appending A189
INFO:Appending A190


A192 status code: 404


INFO:Appending A193
INFO:Appending A194
INFO:Appending A195
INFO:Appending A196
INFO:Appending A197
INFO:Appending A198
INFO:Appending A199
INFO:Appending A200
INFO:Appending A201
INFO:Appending A202
INFO:Appending A203
INFO:Appending A204
INFO:Appending A205


A206 status code: 404


INFO:Appending A207
INFO:Appending A208
INFO:Appending A209
INFO:Appending A210
INFO:Appending A211
INFO:Appending A212
INFO:Appending A213
INFO:Appending A214
INFO:Appending A215
INFO:Appending A216
INFO:Appending A217
INFO:Appending A218
INFO:Appending A219
INFO:Appending A220
INFO:Appending A221
INFO:Appending A222
INFO:Appending A223
INFO:Appending A224
INFO:Appending A225
INFO:Appending A226
INFO:Appending A227
INFO:Appending A228
INFO:Appending A229
INFO:Appending A230
INFO:Appending A231
INFO:Appending A232
INFO:Appending A233
INFO:Appending A234
INFO:Appending A235
INFO:Appending A236
INFO:Appending A237
INFO:Appending A238
INFO:Appending A239
INFO:Appending A240
INFO:Appending A241
INFO:Appending A242
INFO:Appending A243
INFO:Appending A244
INFO:Appending A245
INFO:Appending A246
INFO:Appending A247
INFO:Appending A248
INFO:Appending A249
INFO:Appending A250
INFO:Appending A251


A252 status code: 404


INFO:Appending A253
INFO:Appending A254
INFO:Appending A255
INFO:Appending A256
INFO:Appending A257
INFO:Appending A258
INFO:Appending A259
INFO:Appending A260
INFO:Appending A261
INFO:Appending A262
INFO:Appending A263
INFO:Appending A264
INFO:Appending A265
INFO:Appending A266
INFO:Appending A267
INFO:Appending A268
INFO:Appending A269
INFO:Appending A270


A271 status code: 500


INFO:Appending A272
INFO:Appending A273
INFO:Appending A274
INFO:Appending A275
INFO:Appending A276
INFO:Appending A277
INFO:Appending A278
INFO:Appending A279
INFO:Appending A280
INFO:Appending A281
INFO:Appending A282
INFO:Appending A283
INFO:Appending A284
INFO:Appending A285
INFO:Appending A286
INFO:Appending A287


A288 status code: 500


INFO:Appending A289


A290 status code: 500


INFO:Appending A291
INFO:Appending A292
INFO:Appending A293
INFO:Appending A294


A295 status code: 500


INFO:Appending A296
INFO:Appending A297
INFO:Appending A298
INFO:Appending A299
INFO:Appending A300
INFO:Appending A301
INFO:Appending A302
INFO:Appending A303
INFO:Appending A304
INFO:Appending W1
INFO:Appending W2
INFO:Appending W3
INFO:Appending W4
INFO:Appending W5
INFO:Appending W6
INFO:Appending W7
INFO:Appending W8
INFO:Appending W9
INFO:Appending W10
INFO:Appending W11
INFO:Appending W12
INFO:Appending W13
INFO:Appending W14
INFO:Appending W15
INFO:Appending W16
INFO:Appending W17
INFO:Appending W18
INFO:Appending W19
INFO:Appending W20
INFO:Appending W21
INFO:Appending W22
INFO:Appending W23
INFO:Appending W24
INFO:Appending W25
INFO:Appending W26
INFO:Appending W27
INFO:Appending W28
INFO:Appending W29
INFO:Appending W30
INFO:Appending W31
INFO:Appending W32
INFO:Appending W33
INFO:Appending W34
INFO:Appending W35
INFO:Appending W36
INFO:Appending W37
INFO:Appending W38
INFO:Appending W39
INFO:Appending W40
INFO:Appending W41
INFO:Appending W42
INFO:Appending W43
INFO:Appendi

W173 status code: 500


INFO:Appending W174
INFO:Appending W175
INFO:Appending W176
INFO:Appending W177
INFO:Appending W178


W179 status code: 500


INFO:Appending W180
INFO:Appending W181
INFO:Appending W182
INFO:Appending W183
INFO:Appending W184
INFO:Appending W185
INFO:Appending W186
INFO:Appending W187
INFO:Appending W188
INFO:Appending W189
INFO:Appending W190
INFO:Appending W191
INFO:Appending W192
INFO:Appending W193
INFO:Appending W194
INFO:Appending W195
INFO:Appending W196
INFO:Appending W197
INFO:Appending W198
INFO:Appending W199
INFO:Appending W200
INFO:Appending W201
INFO:Appending W202
INFO:Appending W203
INFO:Appending W204
INFO:Appending W205
INFO:Appending W206
INFO:Appending W207
INFO:Appending W208
INFO:Appending W209
INFO:Appending W210
INFO:Appending W211
INFO:Appending W212
INFO:Appending W213
INFO:Appending W214
INFO:Appending W215
INFO:Appending W216
INFO:Appending W217
INFO:Appending W218
INFO:Appending W219
INFO:Appending W220
INFO:Appending W221
INFO:Appending W222
INFO:Appending W223
INFO:Appending W224
INFO:Appending W225
INFO:Appending W226
INFO:Appending W227
INFO:Appending W228
INFO:Appending W229


W261 status code: 500


INFO:Appending W262
INFO:Appending W263
INFO:Appending W264
INFO:Appending W265
INFO:Appending W266
INFO:Appending W267
INFO:Appending W268
INFO:Appending W269
INFO:Appending W270
INFO:Appending W271
INFO:Appending W272
INFO:Appending W273
INFO:Appending W274
INFO:Appending W275
INFO:Appending W276
INFO:Appending W277
INFO:Appending W278
INFO:Appending W279
INFO:Appending W280
INFO:Appending W281
INFO:Appending W282
INFO:Appending W283
INFO:Appending W284
INFO:Appending W285
INFO:Appending W286
INFO:Appending W287
INFO:Appending W288
INFO:Appending W289
INFO:Appending W290
INFO:Appending W291
INFO:Appending W292
INFO:Appending W293
INFO:Appending W294
INFO:Appending W295
INFO:Appending W296
INFO:Appending W297
INFO:Appending W298
INFO:Appending W299
INFO:Appending W300
INFO:Appending W301
INFO:Appending W302
INFO:Appending W303
INFO:Appending W304
INFO:Appending W305


W306 status code: 500


INFO:Appending W307
INFO:Appending W308
INFO:Appending W309
INFO:Appending W310
INFO:Appending W311
INFO:Appending W312
INFO:Appending W313
INFO:Appending W314
INFO:Appending W315
INFO:Appending W316
INFO:Appending W317
INFO:Appending W318
INFO:Appending W319
INFO:Appending W320
INFO:Appending W321
INFO:Appending W322
INFO:Appending W323
INFO:Appending W324
INFO:Appending W325
INFO:Appending W326
INFO:Appending W327
INFO:Appending W328
INFO:Appending W329
INFO:Appending W330
INFO:Appending W331
INFO:Appending W332
INFO:Appending W333
INFO:Appending W334
INFO:Appending W335
INFO:Appending W336
INFO:Appending W337
INFO:Appending W338
INFO:Appending W339
INFO:Appending W340
INFO:Appending W341
INFO:Appending W342
INFO:Appending W343
INFO:Appending W344
INFO:Appending W345
INFO:Appending W346
INFO:Appending W347
INFO:Appending W348
INFO:Appending W349
INFO:Appending W350
INFO:Appending W351
INFO:Appending W352
INFO:Appending W353
INFO:Appending W354
INFO:Appending W355
INFO:Appending W356


W425 status code: 500


INFO:Appending W426
INFO:Appending W427
INFO:Appending W428
INFO:Appending W429
INFO:Appending W430
INFO:Appending W431
INFO:Appending W432


W433 status code: 500
W434 status code: 500
W435 status code: 500


INFO:Appending W436
INFO:Appending W437
INFO:Appending W438


W439 status code: 500


INFO:Appending W440
INFO:Appending W441
INFO:Appending W442
INFO:Appending W443
INFO:Appending W444
INFO:Appending W445
INFO:Appending W446
INFO:Appending W447
INFO:Appending W448
INFO:Appending W449
INFO:Appending W450
INFO:Appending W451
INFO:Appending W452
INFO:Appending W453
INFO:Appending W454
INFO:Appending W455
INFO:Appending W456
INFO:Appending W457
INFO:Appending W458
INFO:Appending W459
INFO:Appending W460
INFO:Appending W461
INFO:Appending W462
INFO:Appending W463
INFO:Appending W464
INFO:Appending W465
INFO:Appending W466
INFO:Appending W467
INFO:Appending W468
INFO:Appending W469
INFO:Appending W470
INFO:Appending W471
INFO:Appending W472
INFO:Appending W473
INFO:Appending W474
INFO:Appending W475
INFO:Appending W476
INFO:Appending W477
INFO:Appending W478
INFO:Appending W479
INFO:Appending W480
INFO:Appending W481
INFO:Appending W482
INFO:Appending W483
INFO:Appending W484
INFO:Appending W485
INFO:Appending W486
INFO:Appending W487
INFO:Appending W488
INFO:Appending W489


W526 status code: 500


INFO:Appending W527
INFO:Appending W528
INFO:Appending W529
INFO:Appending W530
INFO:Appending W531
INFO:Appending W532
INFO:Appending W533
INFO:Appending W534
INFO:Appending W535
INFO:Appending W536
INFO:Appending W537
INFO:Appending W538
INFO:Appending W539
INFO:Appending W540
INFO:Appending W541
INFO:Appending W542
INFO:Appending W543
INFO:Appending W544
INFO:Appending W545
INFO:Appending W546
INFO:Appending W547
INFO:Appending W548
INFO:Appending W549
INFO:Appending W550
INFO:Appending W551
INFO:Appending W552
INFO:Appending W553


W554 status code: 500


INFO:Appending W555
INFO:Appending W556
INFO:Appending W557
INFO:Appending W558
INFO:Appending W559
INFO:Appending W560
INFO:Appending W561
INFO:Appending W562
INFO:Appending W563
INFO:Appending W564
INFO:Appending W565
INFO:Appending W566
INFO:Appending W567
INFO:Appending W568
INFO:Appending W569
INFO:Appending W570
INFO:Appending W571
INFO:Appending W572
INFO:Appending W573
INFO:Appending W574
INFO:Appending W575


W576 status code: 500
W577 status code: 500
W578 status code: 500
W579 status code: 500


INFO:Appending W580


W581 status code: 500


INFO:Appending W582
INFO:Appending W583
INFO:Appending W584
INFO:Appending W585
INFO:Appending W586
INFO:Appending W587
INFO:Appending W588
INFO:Appending W589
INFO:Appending W590
INFO:Appending W591
INFO:Appending W592
INFO:Appending W593
INFO:Appending W594
INFO:Appending W595
INFO:Appending W596
INFO:Appending W597


W598 status code: 500


INFO:Appending W599
INFO:Appending W600
INFO:Appending W601
INFO:Appending W602
INFO:Appending W603


CPU times: user 1min 27s, sys: 1.69 s, total: 1min 29s
Wall time: 13min 54s


In [7]:
WsDf = pd.read_csv('WsDf.csv').drop(columns=['Unnamed: 0'])
WsDf.index.name = 'W#'
WsDf

Unnamed: 0_level_0,title,text,Title,First Line,Author,Composition Date,Composition Location,Source of First Publication,First Publication Publisher,First Publication Year,First Publication Location
W#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,Untitled,"['Сразу стало тихо в доме, Обле...",no title,Srazu stalo tikho v dome…,Anna Akhmatova,July 1917,Slepnevo,Podorozhnik,Petropolis,1921,
1,Untitled,['Ты — отступник: за остров зелёный ...,no title,Ty - otstupnik: za ostrov zelenyi…,Anna Akhmatova,summer 1917,Slepnevo,Podorozhnik,Petropolis,1921,
2,Untitled,['Просыпаться на рассвете Оттог...,no title,Prosypat'sia na rassvete…,Anna Akhmatova,July 1917,Slepnevo,Podorozhnik,Petropolis,1921,
3,Untitled,"['И в тайную дружбу с высоким, ...",no title,I v tainuiu druzhbu c vysokim…,Anna Akhmatova,1917,Petrograd,Podorozhnik,Petropolis,1921,
4,Untitled,"['Словно ангел, возмутивший воду, ...",no title,"Slovno angel, vozmutivshii vodu…",Anna Akhmatova,February 1916,Tsarskoe selo,Podorozhnik,Petropolis,1921,
...,...,...,...,...,...,...,...,...,...,...,...
581,Untitled,"['Любовь распяли на кресте, Но в...","""Liubov' raspiali na kreste""",Liubov' raspiali na kreste,Georgii Andreevich Viatkin,,Omsk,Ranenaia Rossiia: Stikhi; Vernost': rasskaz; E...,Tipografiia Vremennogo Tsentral’nogo Voenno-P...,1919,Ekaterinburg
582,На словах...,[],Na slovakh... (Nesvoevremennye mysli),Na slovakh--vse soglasny...,Maksim Gor'kii,"June 29, 1917",Petrograd,Novaia zhizn',A. N. Tikhonov,"June 29, 1917",Petrograd
583,Последняя просьба,"['Сестра!.. Сестрица, на минутку подойдите ...",Posledniaia pros'ba,"Sestra! Sestritsa, na minutku podoidite…",M Kolchin,date unknown,,Pesni voiny: posviashchaetsia doblestnym sibir...,Tipografiia I. M. Poznera,1915,
584,И рек Сидящий на престоле,"['В борьбе с врагом, в борьбе кровавой, геройс...",I rek Sidiashchii na prestole,"V bor'be s vragom, v bor'be krovavom...",M. Did,1914 to 1915,,"Nabat: Stikhotvoreniia, 1914-1915",Tipografiia N. A. Vorob'eva,1916,


In [8]:
AsDf = pd.read_csv('AsDf.csv').drop(columns=['Unnamed: 0'])
AsDf.index.name = 'A#'
AsDf

Unnamed: 0_level_0,name,birth,death,Type of Agent,Sex,Occupations,Family Social Strata,Literary Affiliations,Political Affiliations,Type of Corporate Body,Affiliation
A#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,Anna Akhmatova,"June 23, 1889","March 5, 1966",person,female,poet,nobility,Acmeism,independent,,
1,Vasilii Dmitrievich Aleksandrovskii,"January 15, 1897","November 16, 1934",person,male,soldier,peasant,Kuznitsa,Bolshevik member,,
2,Ivan Nikolaevich Antonov,1878,1936?,person,male,editor,unknown,unknown,independent,,
3,Mikhail Dmitrievich Artamonov,"February 22, 1888","November 22, 1958",person,male,journalist,peasant,Vologda poets,unknown,,
4,Nikolai Aseev,"July 10, 1889","July 16, 1963",person,male,soldier,nobility,Left Front of Art: LEF,Bolshevik member,,
...,...,...,...,...,...,...,...,...,...,...,...
285,Moisei Solomonovich Uritskii,"January 14, 1873","August 30, 1918",,,,,,,,
286,Maximilien Marie Isidore de Robespierre,"May 6, 1758","June 28, 1794",,,,,,,,
287,Iurii Mikhailovich Steklov,"August 27, 1873","July 15, 1941",,,,,,,,
288,Christian August Friedrich Peters,"September 7, 1806","May 8, 1880",,,,,,,,


## Dataframe Split

### libDf

In [None]:
libDf = pd.DataFrame.from_dict(XMLdict, orient='index').rename_axis('work_num').sort_index(inplace=False)
libDf.index = libDf.index.str.lstrip('0')
libDf

### authorsDf

In [None]:
authorsDf = libDf.reset_index().groupby('author').size().to_frame().rename(columns={0:'num_works'})
authorsDf[['num_lps','num_words']] = libDf.reset_index().groupby('author').sum()
authorsDf.sort_values(by=['num_words','num_works'], ascending=False)
authorsDf

### worksDf

In [None]:
worksDf = libDf[['title','year','author','genre','num_lps','num_words']]
worksDf

### tokenDf

In [None]:
lpDf = libDf[['text']]
lpDf = lpDf.text.apply(lambda x: pd.Series([y for y in x])).stack().to_frame().rename(columns={0:'lp_str'})
lpDf.index.names = OHCO[:2]
lpDf
tokenDf = lpDf.lp_str.apply(lambda x: tokenize(x)).to_frame()#.rename(columns={0:'token'})
#tokenDf = lpDf.lp_str.apply(lambda x: y.text for y in tokenize(x)[1])
tokenDf

In [None]:
for i in tokenize(lpDf.lp_str): 
    print(i)