# What if there is no (maintained) Python library?

Corpus-DB - a textual corpus database for the digital humanities by  @j0_0n (Jonathan Reeve) - provides a small API at http://corpus-db.org/docs

Since there's no Python library written especially for it, the general purpose library for talking to APIs is requests. If you haven't installed it yet, [open a terminal](https://github.com/GCDigitalFellows/installdri.github.io/blob/master/anaconda.md) and type:
```bash
conda install requests -y
```

In [14]:
#let's import requests
import requests

In [15]:
#let's get all the books for jane austen
r = requests.get("http://corpus-db.org/api/author/Austen, Jane")

In [16]:
# what's in r?
r

<Response [200]>

In [17]:
#200 means nothing broke, but what's in r?
r.json()

[{'': '102',
  'LCC': "{'PR'}",
  '_repo': 'Persuasion_105',
  '_version': '0.2.0',
  'alternative_title': '',
  'author': 'Austen, Jane',
  'authoryearofbirth': '1775',
  'authoryearofdeath': '1817',
  'contributor': '',
  'covers': "[{'cover_type': 'archival', 'image_path': 'epub/cover.jpeg'}]",
  'creator': "{'author': {'agent_name': 'Austen, Jane', 'birthdate': 1775, 'deathdate': 1817, 'gutenberg_agent_id': '68', 'url': 'http://www.gutenberg.org/2009/agents/68', 'wikipedia': 'http://en.wikipedia.org/wiki/Jane_Austen'}}",
  'description': "Persuasion is Jane Austen's last completed novel, published posthumously. She began it soon after she had finished Emma and completed it in August 1816. Persuasion was published in December 1817, but is dated 1818. The author died earlier in 1817.\nAs the Napoleonic Wars come to an end in 1814, Admirals and Captains of the Royal Navy are put ashore, their work done. Anne Elliot meets Captain Frederick Wentworth after seven years, by the chance of 

In [18]:
#lets use pandas dor a quick view
import pandas as pd
df = pd.io.json.json_normalize(r.json())
df.head()

Unnamed: 0,Unnamed: 1,LCC,_repo,_version,alternative_title,author,authoryearofbirth,authoryearofdeath,contributor,covers,...,tableOfContents,title,titlepage_image,type,url,wikipedia,wp_info,wp_literary_genres,wp_publication_date,wp_subjects
0,102,{'PR'},Persuasion_105,0.2.0,,"Austen, Jane",1775,1817,,"[{'cover_type': 'archival', 'image_path': 'epu...",...,,Persuasion,,Text,http://www.gutenberg.org/ebooks/105,['https://fi.wikipedia.org/wiki/Viisasteleva_s...,{'http://www.w3.org/1999/02/22-rdf-syntax-ns#t...,,,"['Novels_about_nobility', 'John_Murray_(publis..."
1,116,{'PR'},Northanger-Abbey_121,0.2.0,,"Austen, Jane",1775,1817,,"[{'cover_type': 'generated', 'image_path': 'co...",...,,Northanger Abbey,,Text,http://www.gutenberg.org/ebooks/121,['https://fi.wikipedia.org/wiki/Northanger_Abb...,{'http://www.w3.org/1999/02/22-rdf-syntax-ns#t...,,,"['Novels_by_Jane_Austen', 'Novels_set_in_Somer..."
2,134,{'PR'},Mansfield-Park_141,0.2.0,,"Austen, Jane",1775,1817,,"[{'cover_type': 'generated', 'image_path': 'co...",...,,Mansfield Park,,Text,http://www.gutenberg.org/ebooks/141,['https://fi.wikipedia.org/wiki/Kasvattityt%C3...,{'http://www.w3.org/1999/02/22-rdf-syntax-ns#t...,,,"['Novels_by_Jane_Austen', 'British_novels_adap..."
3,151,{'PR'},Emma_158,0.2.0,,"Austen, Jane",1775,1817,,"[{'cover_type': 'generated', 'image_path': 'co...",...,,Emma,,Text,http://www.gutenberg.org/ebooks/158,['https://fi.wikipedia.org/wiki/Emma_(romaani)...,{'http://www.w3.org/1999/02/22-rdf-syntax-ns#t...,['Novel_of_manners'],,"['Novels_by_Jane_Austen', 'Novels_about_nobili..."
4,154,{'PR'},Sense-and-Sensibility_161,0.2.0,,"Austen, Jane",1775,1817,,"[{'cover_type': 'generated', 'image_path': 'co...",...,,Sense and Sensibility,,Text,http://www.gutenberg.org/ebooks/161,['https://fi.wikipedia.org/wiki/J%C3%A4rki_ja_...,{'http://www.w3.org/1999/02/22-rdf-syntax-ns#t...,,,"['Debut_novels', 'Novels_by_Jane_Austen', 'Wor..."


In [19]:
#but it's a corpus? 
r = requests.get("http://corpus-db.org/api/author/Austen, Jane/fulltext")

In [20]:
bdf = pd.io.json.json_normalize(r.json())
bdf.head()

Unnamed: 0,id,text
0,105,by Al Haines.\n\n\n\n\n\n\n\n\n\n\nPersuasion\...
1,121,\n\n\n\n\nNORTHANGER ABBEY\n\n\nby\n\nJane Aus...
2,141,\n\n\n\n\nMANSFIELD PARK\n\n(1814)\n\n\nBy Jan...
3,158,\n\n\n\n\nEMMA\n\nBy Jane Austen\n\n\n\n\nVOLU...
4,161,\nSpecial thanks are due to Sharon Partridge f...


In [23]:
# how do we combine the two?
print(df['id'], bdf['id'])

0       105.0
1       121.0
2       141.0
3       158.0
4       161.0
5       946.0
6      1212.0
7      1342.0
8     21839.0
9     25946.0
10    31100.0
11    33388.0
12    35151.0
13    35163.0
14    36777.0
15    37431.0
16    37634.0
17    42078.0
18    42671.0
19    45186.0
Name: id, dtype: object 0       105
1       121
2       141
3       158
4       161
5       946
6      1212
7      1342
8     21839
9     25946
10    31100
11    33388
12    35151
13    35163
14    36777
15    37431
16    37634
17    42078
18    42671
19    45186
Name: id, dtype: object


In [28]:
# lets fix that first id by stripping out the last two letters (.02)
# pandas string cleaning at https://pandas.pydata.org/pandas-docs/stable/text.html
df['fid'] = df['id'].str[:-2]
df['fid']

0       105
1       121
2       141
3       158
4       161
5       946
6      1212
7      1342
8     21839
9     25946
10    31100
11    33388
12    35151
13    35163
14    36777
15    37431
16    37634
17    42078
18    42671
19    45186
Name: fid, dtype: object

In [31]:
#how do we merge so we have metadata with text?
#left.merge(right, **kwargs)
austen_corpus = df.merge(bdf, left_on='fid', right_on="id")
austen_corpus.head()

Unnamed: 0,Unnamed: 1,LCC,_repo,_version,alternative_title,author,authoryearofbirth,authoryearofdeath,contributor,covers,...,type,url,wikipedia,wp_info,wp_literary_genres,wp_publication_date,wp_subjects,fid,id_y,text
0,102,{'PR'},Persuasion_105,0.2.0,,"Austen, Jane",1775,1817,,"[{'cover_type': 'archival', 'image_path': 'epu...",...,Text,http://www.gutenberg.org/ebooks/105,['https://fi.wikipedia.org/wiki/Viisasteleva_s...,{'http://www.w3.org/1999/02/22-rdf-syntax-ns#t...,,,"['Novels_about_nobility', 'John_Murray_(publis...",105,105,by Al Haines.\n\n\n\n\n\n\n\n\n\n\nPersuasion\...
1,116,{'PR'},Northanger-Abbey_121,0.2.0,,"Austen, Jane",1775,1817,,"[{'cover_type': 'generated', 'image_path': 'co...",...,Text,http://www.gutenberg.org/ebooks/121,['https://fi.wikipedia.org/wiki/Northanger_Abb...,{'http://www.w3.org/1999/02/22-rdf-syntax-ns#t...,,,"['Novels_by_Jane_Austen', 'Novels_set_in_Somer...",121,121,\n\n\n\n\nNORTHANGER ABBEY\n\n\nby\n\nJane Aus...
2,134,{'PR'},Mansfield-Park_141,0.2.0,,"Austen, Jane",1775,1817,,"[{'cover_type': 'generated', 'image_path': 'co...",...,Text,http://www.gutenberg.org/ebooks/141,['https://fi.wikipedia.org/wiki/Kasvattityt%C3...,{'http://www.w3.org/1999/02/22-rdf-syntax-ns#t...,,,"['Novels_by_Jane_Austen', 'British_novels_adap...",141,141,\n\n\n\n\nMANSFIELD PARK\n\n(1814)\n\n\nBy Jan...
3,151,{'PR'},Emma_158,0.2.0,,"Austen, Jane",1775,1817,,"[{'cover_type': 'generated', 'image_path': 'co...",...,Text,http://www.gutenberg.org/ebooks/158,['https://fi.wikipedia.org/wiki/Emma_(romaani)...,{'http://www.w3.org/1999/02/22-rdf-syntax-ns#t...,['Novel_of_manners'],,"['Novels_by_Jane_Austen', 'Novels_about_nobili...",158,158,\n\n\n\n\nEMMA\n\nBy Jane Austen\n\n\n\n\nVOLU...
4,154,{'PR'},Sense-and-Sensibility_161,0.2.0,,"Austen, Jane",1775,1817,,"[{'cover_type': 'generated', 'image_path': 'co...",...,Text,http://www.gutenberg.org/ebooks/161,['https://fi.wikipedia.org/wiki/J%C3%A4rki_ja_...,{'http://www.w3.org/1999/02/22-rdf-syntax-ns#t...,,,"['Debut_novels', 'Novels_by_Jane_Austen', 'Wor...",161,161,\nSpecial thanks are due to Sharon Partridge f...


In [40]:
#remove leading and trailing new lines
austen_corpus["textn"] = austen_corpus['text'].str.strip("\n")
austen_corpus['textn']

0     by Al Haines.\n\n\n\n\n\n\n\n\n\n\nPersuasion\...
1     NORTHANGER ABBEY\n\n\nby\n\nJane Austen (1803)...
2     MANSFIELD PARK\n\n(1814)\n\n\nBy Jane Austen\n...
3     EMMA\n\nBy Jane Austen\n\n\n\n\nVOLUME I\n\n\n...
4     Special thanks are due to Sharon Partridge for...
5     LADY SUSAN\n\nby Jane Austen\n\n\n\n\nI\n\n\nL...
6     LOVE AND FREINDSHIP AND OTHER EARLY WORKS\n\n(...
7     PRIDE AND PREJUDICE\n\nBy Jane Austen\n\n\n\nC...
8                              Transcriber's Note:\n...
9     Online Distributed Proofreading Team at http:/...
10    THE WORKS OF JANE AUSTEN\n\n\n\nEdited by Davi...
11       Note de transcription:\n   Les erreurs clai...
12      Au lecteur\n\n  Madame de Montolieu a tradui...
13      Au lecteur\n\n  Madame de Montolieu a tradui...
14      Au lecteur\n\n  Cette version électronique r...
15    produced from scanned images of public domain ...
16      Au lecteur\n\n  Madame de Montolieu a tradui...
17    [Transcriber's Note: letters that were sup

# Try with a different author