In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import chart_studio.plotly as csp

In [2]:
#All Gutenberg metadata
metadata = pd.read_csv('../data/metadata/metadata.csv')

In [3]:
metadata.count()

id                   65353
title                65352
author               62881
authoryearofbirth    49890
authoryearofdeath    48841
language             65353
downloads            65353
subjects             65353
type                 65353
dtype: int64

In [4]:
#Select detective/mystery fiction and tales in English
kw = ['fiction','tale']
subj = ['detective', 'mystery','mysteries']
mystery = metadata[metadata['language']=='[\'en\']']
mystery = mystery[mystery['subjects'].apply(lambda x: any([k in x.lower() for k in kw]))]
mystery = mystery[mystery['subjects'].apply(lambda x: any([k in x.lower() for k in subj]))]

In [5]:
popular = mystery[mystery['downloads'] > 50].sort_values('downloads', ascending = False)
popular

Unnamed: 0,id,title,author,authoryearofbirth,authoryearofdeath,language,downloads,subjects,type
7344,PG1661,The Adventures of Sherlock Holmes,"Doyle, Arthur Conan",1859.0,1930.0,['en'],21284,"{'Holmes, Sherlock (Fictitious character) -- F...",Text
20542,PG2852,The Hound of the Baskervilles,"Doyle, Arthur Conan",1859.0,1930.0,['en'],15478,"{'Dartmoor (England) -- Fiction', 'Holmes, She...",Text
16071,PG244,A Study in Scarlet,"Doyle, Arthur Conan",1859.0,1930.0,['en'],8732,"{'Holmes, Sherlock (Fictitious character) -- F...",Text
17237,PG2554,Crime and Punishment,"Dostoyevsky, Fyodor",1821.0,1881.0,['en'],7860,"{'Murder -- Fiction', 'Crime -- Psychological ...",Text
63867,PG863,The Mysterious Affair at Styles,"Christie, Agatha",1890.0,1976.0,['en'],5544,{'Private investigators -- England -- Fiction'...,Text
...,...,...,...,...,...,...,...,...,...
34391,PG41001,The Red Room,"Le Queux, William",1864.0,1927.0,['en'],51,"{'Murder -- Fiction', 'England -- Fiction', 'D...",Text
9039,PG1814,The Agony Column,"Biggers, Earl Derr",1884.0,1933.0,['en'],51,"{'Americans -- England -- Fiction', 'Detective...",Text
16126,PG2454,The Silent Bullet,"Reeve, Arthur B. (Arthur Benjamin)",1880.0,1936.0,['en'],51,"{'Kennedy, Craig (Fictitious character) -- Fic...",Text
50289,PG55310,The Lone Inn: A Mystery,"Hume, Fergus",1859.0,1932.0,['en'],51,"{'Detective and mystery stories', 'English fic...",Text


In [6]:
popular.groupby(['author'])['title'].count().sort_values(ascending = False)

author
Doyle, Arthur Conan                    27
Gaboriau, Emile                         9
Chesterton, G. K. (Gilbert Keith)       9
Rohmer, Sax                             8
Leblanc, Maurice                        7
                                       ..
Annas, Hal                              1
Lynch, Lawrence L.                      1
Lytton, Edward Bulwer Lytton, Baron     1
MacDonald, John D. (John Dann)          1
Lowndes, Marie Belloc                   1
Name: title, Length: 90, dtype: int64

In [7]:
not_popular = mystery[mystery['downloads'] <= 50].sort_values('downloads', ascending = False)
not_popular

Unnamed: 0,id,title,author,authoryearofbirth,authoryearofdeath,language,downloads,subjects,type
6979,PG1628,My Lady's Money,"Collins, Wilkie",1824.0,1889.0,['en'],50,"{'Detective and mystery stories', 'England -- ...",Text
52511,PG57311,The Heart of a Mystery,"Speight, T. W. (Thomas Wilkinson)",1830.0,1915.0,['en'],50,"{'English fiction -- 19th century', 'Mystery a...",Text
58670,PG62860,"Hidden Foes; Or, A Fatal Miscalculation","Carter, Nicholas (House name)",,,['en'],49,"{'Dime novels', 'Detective and mystery stories...",Text
6412,PG1577,The Grey Room,"Phillpotts, Eden",1862.0,1960.0,['en'],49,{'Mystery fiction'},Text
35576,PG42069,Janet Hardy in Hollywood,"Wheeler, Ruthe S.",,,['en'],49,"{'Mystery and detective stories', 'Motion pict...",Text
...,...,...,...,...,...,...,...,...,...
62810,PG7689,Lucretia — Volume 05,"Lytton, Edward Bulwer Lytton, Baron",1803.0,1873.0,['en'],3,"{'English fiction -- 19th century', 'Crime -- ...",Text
39124,PG45262,The Bungalow Boys in the Great Northwest,"Goldfrap, John Henry",1879.0,1917.0,['en'],3,"{'Mystery and detective stories', 'Northwest, ...",Text
47509,PG52809,The Banner Boy Scouts Mystery,"Warren, George A.",,,['en'],2,"{'Spies -- Juvenile fiction', 'Robbery -- Juve...",Text
39381,PG45494,Airplane Boys in the Black Woods,"Craine, E. J. (Edith Janice)",1881.0,,['en'],2,"{'Airplanes -- Juvenile fiction', 'Air pilots ...",Text


In [8]:
not_popular.groupby(['author'])['title'].count().sort_values(ascending = False)

author
Hume, Fergus                            52
Le Queux, William                       33
Snell, Roy J. (Roy Judson)              28
Wirt, Mildred A. (Mildred Augustine)    23
Doyle, Arthur Conan                     20
                                        ..
Browne, Howard                           1
Lynde, Francis                           1
Brown, Rosel George                      1
M'Govan, James                           1
Miller, Alex. McVeigh, Mrs.              1
Name: title, Length: 351, dtype: int64

In [9]:
min_mystery = mystery[mystery['authoryearofbirth']>0]['authoryearofbirth'].min()
max_mystery = mystery[mystery['authoryearofbirth']>0]['authoryearofbirth'].max()
mystery[mystery['authoryearofbirth'].isin([min_mystery, max_mystery])]

Unnamed: 0,id,title,author,authoryearofbirth,authoryearofdeath,language,downloads,subjects,type
7532,PG1678,An Historical Mystery (The Gondreville Mystery),"Balzac, Honoré de",1799.0,1850.0,['en'],32,{'French fiction -- Translations into English'...,Text
26975,PG34323,The Samurai Strategy,"Hoover, Thomas",1941.0,,['en'],100,{'Mystery fiction'},Text


In [10]:
min_meta = metadata[metadata['authoryearofbirth']>0]['authoryearofbirth'].min()
max_meta = metadata[metadata['authoryearofbirth']>0]['authoryearofbirth'].max()
metadata[metadata['authoryearofbirth'].isin([min_meta, max_meta])]

Unnamed: 0,id,title,author,authoryearofbirth,authoryearofdeath,language,downloads,subjects,type
21812,PG29675,Less than Human,"Blade, Zoë",1981.0,,['en'],7,"{'Short stories', 'Assassins -- Fiction', 'Sci...",Text
21813,PG29676,Identity,"Blade, Zoë",1981.0,,['en'],12,"{'Short stories', 'Science fiction'}",Text
46821,PG5218,The Satyricon — Volume 01: Introduction,Petronius Arbiter,20.0,66.0,['en'],38,"{'Rome -- Fiction', 'Satire, Latin -- Translat...",Text
46832,PG5219,The Satyricon — Volume 02: Dinner of Trimalchio,Petronius Arbiter,20.0,66.0,['en'],343,"{'Rome -- Fiction', 'Satire, Latin -- Translat...",Text
46844,PG5220,The Satyricon — Volume 03: Encolpius and His C...,Petronius Arbiter,20.0,66.0,['en'],14,"{'Satire, Latin -- Translations into English',...",Text
46855,PG5221,The Satyricon — Volume 04 : Escape by Sea,Petronius Arbiter,20.0,66.0,['en'],19,"{'Satire, Latin -- Translations into English',...",Text
46866,PG5222,The Satyricon — Volume 05: Crotona Affairs,Petronius Arbiter,20.0,66.0,['en'],15,"{'Satire, Latin -- Translations into English',...",Text
46877,PG5223,The Satyricon — Volume 06: Editor's Notes,Petronius Arbiter,20.0,66.0,['en'],22,"{'Satire, Latin -- Translations into English',...",Text
46888,PG5224,The Satyricon — Volume 07: Marchena Notes,Petronius Arbiter,20.0,66.0,['en'],7,"{'Satire, Latin -- Translations into English',...",Text
46899,PG5225,The Satyricon — Complete,Petronius Arbiter,20.0,66.0,['en'],1936,"{'Rome -- Fiction', 'Satire, Latin -- Translat...",Text


In [49]:
popular_meta = metadata[metadata['downloads'] > 1000].sort_values('downloads', ascending = False)
popular_meta.head(10)

Unnamed: 0,id,title,author,authoryearofbirth,authoryearofdeath,language,downloads,subjects,type
63712,PG84,"Frankenstein; Or, The Modern Prometheus","Shelley, Mary Wollstonecraft",1797.0,1851.0,['en'],85500,"{'Horror tales', ""Frankenstein's monster (Fict...",Text
3802,PG1342,Pride and Prejudice,"Austen, Jane",1775.0,1817.0,['en'],59069,"{'England -- Fiction', 'Domestic fiction', 'Yo...",Text
60288,PG64317,The Great Gatsby,"Fitzgerald, F. Scott (Francis Scott)",1896.0,1940.0,['en'],37511,"{'Rich people -- Fiction', 'Married women -- F...",Text
65240,PG98,A Tale of Two Cities,"Dickens, Charles",1812.0,1870.0,['en'],28827,"{'Executions and executioners -- Fiction', 'Lo...",Text
2217,PG11,Alice's Adventures in Wonderland,"Carroll, Lewis",1832.0,1898.0,['en'],27708,"{""Children's stories"", 'Imaginary places -- Ju...",Text
63656,PG844,The Importance of Being Earnest: A Trivial Com...,"Wilde, Oscar",1854.0,1900.0,['en'],26315,"{'England -- Drama', 'Foundlings -- Drama', 'C...",Text
17104,PG2542,A Doll's House : a play,"Ibsen, Henrik",1828.0,1906.0,['en'],23508,"{'Marriage -- Drama', 'Wives -- Drama', 'Norwe...",Text
894,PG1080,A Modest Proposal: For preventing the children...,"Swift, Jonathan",1667.0,1745.0,['en'],21874,"{'Political satire, English', 'Religious satir...",Text
8320,PG174,The Picture of Dorian Gray,"Wilde, Oscar",1854.0,1900.0,['en'],21790,{'London (England) -- History -- 1800-1950 -- ...,Text
7344,PG1661,The Adventures of Sherlock Holmes,"Doyle, Arthur Conan",1859.0,1930.0,['en'],21284,"{'Holmes, Sherlock (Fictitious character) -- F...",Text


In [51]:
popular_meta.at[894, 'title'] = 'A Modest Proposal'
popular_meta.at[63656, 'title'] = 'The Importance of Being Earnest'
popular_meta.head(10)

Unnamed: 0,id,title,author,authoryearofbirth,authoryearofdeath,language,downloads,subjects,type
63712,PG84,"Frankenstein; Or, The Modern Prometheus","Shelley, Mary Wollstonecraft",1797.0,1851.0,['en'],85500,"{'Horror tales', ""Frankenstein's monster (Fict...",Text
3802,PG1342,Pride and Prejudice,"Austen, Jane",1775.0,1817.0,['en'],59069,"{'England -- Fiction', 'Domestic fiction', 'Yo...",Text
60288,PG64317,The Great Gatsby,"Fitzgerald, F. Scott (Francis Scott)",1896.0,1940.0,['en'],37511,"{'Rich people -- Fiction', 'Married women -- F...",Text
65240,PG98,A Tale of Two Cities,"Dickens, Charles",1812.0,1870.0,['en'],28827,"{'Executions and executioners -- Fiction', 'Lo...",Text
2217,PG11,Alice's Adventures in Wonderland,"Carroll, Lewis",1832.0,1898.0,['en'],27708,"{""Children's stories"", 'Imaginary places -- Ju...",Text
63656,PG844,The Importance of Being Earnest,"Wilde, Oscar",1854.0,1900.0,['en'],26315,"{'England -- Drama', 'Foundlings -- Drama', 'C...",Text
17104,PG2542,A Doll's House : a play,"Ibsen, Henrik",1828.0,1906.0,['en'],23508,"{'Marriage -- Drama', 'Wives -- Drama', 'Norwe...",Text
894,PG1080,A Modest Proposal,"Swift, Jonathan",1667.0,1745.0,['en'],21874,"{'Political satire, English', 'Religious satir...",Text
8320,PG174,The Picture of Dorian Gray,"Wilde, Oscar",1854.0,1900.0,['en'],21790,{'London (England) -- History -- 1800-1950 -- ...,Text
7344,PG1661,The Adventures of Sherlock Holmes,"Doyle, Arthur Conan",1859.0,1930.0,['en'],21284,"{'Holmes, Sherlock (Fictitious character) -- F...",Text


In [12]:
not_popular_meta = metadata[metadata['downloads'] <= 1000].sort_values('downloads', ascending = False)
not_popular_meta

Unnamed: 0,id,title,author,authoryearofbirth,authoryearofdeath,language,downloads,subjects,type
61026,PG64982,The Tale of Bunny Cotton-Tail,"Smith, Laura Rountree",1876.0,1924.0,['en'],1000,"{'Friendship -- Juvenile fiction', 'Obedience ...",Text
2804,PG1252,Le Morte d'Arthur: Volume 2,"Malory, Thomas, Sir",,1471.0,['en'],998,"{'Arthur, King -- Legends', 'Arthurian romances'}",Text
13627,PG222,The Moon and Sixpence,"Maugham, W. Somerset (William Somerset)",1874.0,1965.0,['en'],996,"{'Painters -- Fiction', 'Tahiti (French Polyne...",Text
17515,PG257,Troilus and Criseyde,"Chaucer, Geoffrey",1342.0,1400.0,['en'],996,"{'Troilus (Legendary character) -- Poetry', 'C...",Text
21682,PG29558,"Boy Scouts Handbook: The First Edition, 1911",Boy Scouts of America,,,['en'],993,"{'Boy Scouts of America -- Handbooks, manuals,...",Text
...,...,...,...,...,...,...,...,...,...
61461,PG65373,"The Girl's Own Paper, Vol. VIII, No. 363, Dece...",Various,,,['en'],0,set(),Text
61352,PG65275,Le second enfer d'Etienne Dolet: Suivi de sa t...,"Dolet, Etienne",1509.0,1546.0,['fr'],0,set(),Text
61460,PG65372,The Allen County War Memorial Coliseum,"Adams, Otto H.",,,['en'],0,set(),Text
61353,PG65276,A History of Sculpture,"Short, Ernest Henry",,,['en'],0,set(),Text


In [13]:
mystery_auth = mystery[['author','downloads']].groupby('author').sum('downloads').sort_values(['downloads'],ascending = False).reset_index()
mystery_auth

Unnamed: 0,author,downloads
0,"Doyle, Arthur Conan",61315
1,"Christie, Agatha",14460
2,"Dostoyevsky, Fyodor",7860
3,"Leblanc, Maurice",5374
4,"Chesterton, G. K. (Gilbert Keith)",3956
...,...,...
383,"Mitford, Bertram",4
384,"Edholm, Lizette M.",4
385,"Hancock, H. Irving (Harrie Irving)",3
386,"Steele, Jack",3


In [14]:
metadata_auth = metadata[['author','downloads']].groupby('author').sum('downloads').sort_values(['downloads'],ascending = False).reset_index()
metadata_auth

Unnamed: 0,author,downloads
0,"Dickens, Charles",102817
1,"Austen, Jane",97381
2,"Shelley, Mary Wollstonecraft",94366
3,Various,72566
4,"Wilde, Oscar",71756
...,...,...
21144,"Kennedy, Howard Angus",0
21145,"Furth, Carlton",0
21146,"Winstock, Melvin G.",0
21147,"Wild, Christian Gottlob",0


In [20]:
fig1 = px.bar(mystery_auth.head(10), y='author', x='downloads',  
             #title = 'Mystery Authors - Top 10 by Total Downloads',
             template='plotly_dark',
             color_discrete_sequence=px.colors.qualitative.Set2,
             #hover_name = 'rept_name_1', 
             #hover_data = ['from_npi_specialty','patient_count' ],
             labels = {'author':'Author', 'downloads':'Number of downloads'},
            orientation = 'h')
#fig.update_xaxes(type='category')
#fig1.update_traces(texttemplate='%{x}', textposition='inside')
fig1.update_layout(barmode='stack'#, yaxis={'categoryorder':'category ascending'}
                  )
fig1.update_layout({'plot_bgcolor': 'rgba(102,102,102,1)','paper_bgcolor': 'rgba(102,102,102,1)'})
fig1.show()

In [52]:
fig2 = px.bar(metadata_auth.head(10), y='author', x='downloads',  
             #title = 'All Authors - Top 10 by Total Downloads',
             template='plotly_dark',
             color_discrete_sequence=px.colors.qualitative.Set2,
             #hover_name = 'rept_name_1', 
             #hover_data = ['from_npi_specialty','patient_count' ],
             labels = {'author':'Author', 'downloads':'Number of downloads'},
            orientation = 'h')
#fig.update_xaxes(type='category')
#fig2.update_traces(texttemplate='%{x}', textposition='inside')
fig2.update_layout(barmode='stack'#, yaxis={'categoryorder':'category ascending'}
                  )
fig2.update_layout({'plot_bgcolor': 'rgba(102,102,102,1)','paper_bgcolor': 'rgba(102,102,102,1)'})
fig2.show()

In [53]:
fig3 = px.bar(popular_meta.head(10), y='title', x='downloads',  
             #title = 'All Titles - Top 10 by Total Downloads',
             template='plotly_dark',
             color_discrete_sequence=px.colors.qualitative.Set2,
             #hover_name = 'rept_name_1', 
             #hover_data = ['from_npi_specialty','patient_count' ],
             labels = {'title':'Title', 'downloads':'Number of downloads'},
            orientation = 'h')
#fig.update_xaxes(type='category')
#fig3.update_traces(texttemplate='%{x}', textposition='inside')
fig3.update_layout(barmode='stack'#, yaxis={'categoryorder':'category ascending'}
                  )
fig3.update_layout({'plot_bgcolor': 'rgba(102,102,102,1)','paper_bgcolor': 'rgba(102,102,102,1)'})
#fig3.update_layout(margin=dict(l=500, r=20, t=20, b=20))
fig3.show()

In [54]:
fig4 = px.bar(popular.head(10), y='title', x='downloads',  
             #title = 'Mystery Titles - Top 10 by Total Downloads',
             template='plotly_dark',
             color_discrete_sequence=px.colors.qualitative.Set2,
             #hover_name = 'rept_name_1', 
             #hover_data = ['from_npi_specialty','patient_count' ],
             #labels = {'title':'Title', 'downloads':'Number of downloads'},
            orientation = 'h')
#fig.update_xaxes(type='category')
#fig4.update_traces(texttemplate='%{x}', textposition='inside')
fig4.update_layout(barmode='stack'#, yaxis={'categoryorder':'category ascending'}
                  )
fig4.update_layout({'plot_bgcolor': 'rgba(102,102,102,1)','paper_bgcolor': 'rgba(102,102,102,1)'})
fig4.show()