In [1]:
from models import Author, Book, FactSource

import re
import spacy
from spacy import displacy

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
nlp = spacy.load('en_core_web_lg')

In [4]:
def clean_text(text):
    text = re.sub(r'=+.*?=+', '', text)
    text = re.sub(r'[\s\t]+', ' ', text)
    text = re.sub(r'\n+', '\n', text)
    return text

In [8]:
def visulize_wiki_ents(author):
    wiki = author.wikipage.get()
    text = clean_text(wiki.text)
    doc = nlp(text)
    displacy.render(doc, style='ent', options={'ents': ['WORK_OF_ART', 'DATE']}, jupyter=True)

def print_books_by_source(author, source):
    print(f'========== {source.upper()} ===============')
    dbpedia = FactSource.get(FactSource.stype == source)
    for b in author.books.select().where(Book.fact_source == dbpedia):
        print(b)
    print('==============================\n')
    
def print_dbpedia_books(author):
    print_books_by_source(author, 'dbpedia')
    
def print_ner_books(author):
    print_books_by_source(author, 'spacy_clean_lg')

In [9]:
for author in Author.select()[:10]:
    print(f'-== {author.name.upper()} ==-\n')
    print_dbpedia_books(author)
    print_ner_books(author)
    visulize_wiki_ents(author)
    print('\n\n\n')

-== ) ==-

The Rice Sprout Song (1955)

Dream of the Red Chamber (None)
The Golden Cangue (None)
Love (None)
The Rice Sprout Song (None)
Old Man (None)
The Legend of Sleepy Hollow (None)
The Fall of the Pagoda (1963)
The Book of Change (None)
Dream of the Red Chamber (None)
Zhaoyan (None)
Love (1943)
Novellas (None)
The Rice Sprout Song (None)
Traces of Love (None)
Unending Love (1947)
Long Live (1947)
The Sorrows and Joys of Middle Age (1949)
Love (1984)
The Legend (2004)
Zhang Ailing (2005)
Science Fiction (None)
List of Chinese (None)







-== ABRAHAM GRACE MERRITT ==-

The Ship of Ishtar (1924)
The Face in the Abyss (1931)
Dwellers in the Mirage (1932)

The Science Fiction and Fantasy Hall of Fame (1999)
The Moon Pool (1918)
The Face (1919)
The Snake Mother (1919)
The Conquest of the Moon Pool (1918)
The Black Wheel (1948)
The Thirteen Best Supernatural Horror Novels (1983)
Appendix N (None)
The Moon Pool (1919)
the Moon Pool (1919)
online(The Moon Pool (1918)
The Face (1931)
The Snake Mother (1923)
Witch (None)
Through The Dragon Glass (1917)
The People of the Pit (1918)
Three Lines of Old French (1919)
Prologue (1920)
The Pool of the Stone God (1923)
The Women of the Wood (1926)
The Drone (1934)
The Drone Man (None)
The Rhythm of the Spheres (None)
The Last Poet and the Robots (1934)
The Last Poet & the Wrongness of Space (1936)
The Whelming of Cherkis (1946)
Old Gods Wake (1948)
The White Road (1949)
The Fox Woman (1949)
The Dwellers in the (1985)
The Wind Trail (1910)
Old Trinity Churchyard (None)
Song for Wood 





-== ABRAHAM STOKER ==-

The Mystery of the Sea (1902)
The Snake's Pass (1890)
The Primrose Path  (1875)
Under the Sunset (1881)
Miss Betty (1898)
The Shoulder of Shasta (1895)
The Watter's Mou' (1895)
The Man  (1905)
Snowbound: The Record of a Theatrical Touring Party (1908)
Famous Impostors (1910)
Lady Athlyne (1908)
Personal Reminiscences of Henry Irving (1906)

MA (1870)
Hamlet (1876)
The Crystal Cup (1872)
The Chain of Destiny (None)
The Duties of Clerks of Petty Sessions (1876)
In Search of Dracula by (1972)
Home Rule (None)
Dublin Journal (2012)
The Shoulder of Shasta (1895)
The Mystery of the Sea (1902)
Stars (1903)
The Man (None)
The Duties of Clerks of Petty Sessions (1879)
A Glimpse of America (1886)
A Facsimile Edition (2008)
The Question of a National Theatre The Nineteenth Century (1908)
The World's Work (None)
The Censorship of Fiction (None)
The Censorship of Stage Plays (None)
Untold Story (2016)
Victorian Fiction Research Guide Works (None)







-== ADAM CHRISTOPHER MCGECHAN ==-

The Burning Dark (2014)

Angry Robot (None)
Angry Robot (2013)
Brisk Money (2014)
Shadow's Call (None)
The Burning Dark (2014)
The Jovian Conspiracy (None)
The Machine Awakes (2015)
The Stars Below (2014)
The Dead Stars (2017)







-== ADAM ROBERTS ==-

On  (2001)
The Soddit (2003)
Land of the Headless (2007)
Splinter  (2007)
Stone  (2002)
The Snow  (2004)

Tollund (None)
The Snow (2004)
A Novel (2008)
The Thing Itself (2015)
The Black Prince (2018)
S-Bomb (2004)
The Man Who Would Be Kling (2019)
The McAtrix Derided (2004)
The Matrix) (None)
Star Wars (2005)
Science Fiction: the New Critical Idiom (2013)
The Lord of the Rings (None)
A Look Behind (None)
The History of Science Fiction (Palgrave Histories of Literature (None)
The Riddles of The Hobbit (Palgrave McMillan (2013)
Science Fiction and Fantasy (2014)







-== ADAM SAMUEL JAMES FOULDS ==-

The Quickening Maze (2009)

The Truth About These Strange Times (None)
MA (2001)
The Broken Word (2008)
The Quickening Maze (None)
The Broken Word (2009)
The Quickening Maze (None)
The Broken Word (None)
Dream Sequence List (2019)







-== ADELINE VIRGINIA STEPHEN ==-

Monday or Tuesday (1921)
Three Guineas (1938)
The Years (1937)
Flush: A Biography (1933)
Moments of Being (1985)

The Voyage Out (1915)
A Room of One's Own (1929)
Tit-Bits (1891)
Ginia (1880)
Society in those days (None)
On Not Knowing Greek (1925)
the Midnight Society (None)
Eminent Victorian (None)
At Homes (None)
Ka (None)
the Play Reading Society (None)
The Voyage Out (1915)
Love (1937)
Woolfs (1924)
Virginia Woolf Life (None)
The Mark on the Wall (None)
Publication No (1917)
Outsider's Society (None)
Young Poet (1928)
A Letter (1927)
A Writer's Diary (1953)
Woolfs (None)
Between the Acts (1941)
the Neo-Paganism (None)
Between the Acts (1941)
Between the Acts (1941)
The Son of Royal Langbirth (1904)
The Voyage Out (1915)
The Voyage Out (None)
Between the Acts (1941)
Am I a Snob (1936)
The Moment (1947)
Women and Fiction (None)
The Duchess and the Jeweller (1938)
The Duchess (None)
Three Guineas (1938)
Inner Life (2005)
Saint Julia (None)
A Room





-== ADRIENNE CECILE RICH ==-

On Lies, Secrets and Silence (1979)

A Change of World (None)
A Change of World (1951)
Snapshots of a Daughter-in-Law (1963)
Split at the Root (1982)
Wreck (1974)
The Fall of America (None)
A Wild Patience (None)
Dream of a Common Language (1977)
The Fact of a Doorframe (1981)
Compulsory Heterosexuality and Lesbian Existence (None)
MA (1979)
Sinister Wisdom (1981)
Your Life (1986)
Notes Toward a Politics of Location (1984)
An Atlas of the Difficult World (1991)
Genius Grant (None)
The Art of the Possible: Essays and Conversations (1999)
Snapshots of a Daughter-in-Law (None)
Power (None)
Compulsory Heterosexuality and Lesbian Existence (None)
National Medal of Arts (1997)
National Book Foundation Medal (2010)
Secrets and Silence (None)
Compulsory Heterosexuality and Lesbian Existence (1979)
A Change of World (None)
Selected Poems (1967)
The Will to Change (1971)
Love Poems (1976)
The Dream of a Common Language (1978)
A Wild Patience Has Taken Me (None)






-== AGATHA MARY CLARISSA MILLER ==-

Death in the Clouds (1935)
Partners in Crime (short story collection) (1929)
The Thirteen Problems (1932)
Come, Tell Me How You Live (1946)
Nemesis  (1971)
4.50 from Paddington (1957)
A Daughter's a Daughter (1952)
A Caribbean Mystery (1964)
The Murder on the Links (1923)
The Sittaford Mystery (1931)
The Witness for the Prosecution and Other Stories (1948)
N or M? (1941)
The Hollow (1946)
The Labours of Hercules (1947)
The Regatta Mystery (1939)
Three Act Tragedy (1934)
The Under Dog and Other Stories (1951)
Three Blind Mice and Other Stories (1950)
They Do It with Mirrors (1952)
Double Sin and Other Stories (1961)
The Golden Ball and Other Stories (1971)
The Scoop and Behind the Screen (1983)
Problem at Pollensa Bay and Other Stories (1991)
Poems (Christie collection) (1973)
Spider's Web  (2000)
The Road of Dreams (1925)
After the Funeral (1953)
The Secret Adversary (1922)
The Secret of Chimneys (1925)
Evil Under the Sun (1941)
Curtain  (1975)






-== AHMAD SALMAN RUSHDIE ==-

Luka and the Fire of Life (2010)
The Moor's Last Sigh (1995)
The Satanic Verses (1988)
The Ground Beneath Her Feet (1999)
The Jaguar Smile (30)
Midnight's Children (1981)
Grimus (1975)
Fury  (2001)

Midnight's Children (1981)
The Satanic Verses (1988)
Distinguished Writer in Residence (2015)
The Satanic Verses (None)
A Memoir (2012)
The Best Dreams (None)
Midnight's Children (None)
Midnight's Children (1981)
the Best of the Bookers (1993)
character.After Midnight's Children (1983)
The Satanic Verses (1988)
Last Sigh (None)
Shalimar the (2005)
A Memoir (None)
Diary (None)
The Charlie Rose Show (2006)
She Found Me (None)
Talladega Nights (None)
Midnight's Children (None)
Midnight's Children (None)
The Satanic Verses (1988)
Satanic (None)
The Observer (1989)
My Funny Valentine (None)
The Satanic Verses (2015)
Martyred in London (1989)
The Satanic Verses (1990)
International Gorillay (None)
The Satanic Verses (None)
Amnesty … (None)
The Satanic Verses (198





