In [1]:
import sys; sys.path.insert(0,'../../..')
from wikidata import *
setup_log(to_screen=True)
from lltk.model.networks import *


In [16]:
from lltk.imports import *

BAD_PROPVALS = {'','Q0',None,np.nan}
BAD_PROPS={'query','wd_author_match','wd_title_match','corpus','ocr_accuracy','dob'}
OK_PROPS = None

def get_node_type(node): return node.split('=',1)[0]


class TextCorpusGraph(BaseText): pass

class CorpusGraph(BaseCorpus):
    ID='corpus_graph'
    NAME='CorpusGraph'
    TEXT_CLASS = TextCorpusGraph
    def __init__(self,*args,corpora=[],**kwargs):
        super().__init__(*args,**kwargs)
        self._corpusd={}
        if corpora: self.add_corpora(corpora)

    def add_corpora(self,corpora):
        for corp in corpora: self.add_corpus(corp)

    def add_corpus(self,corpus):
        if type(corpus) in {list,tuple,dict}:
            self.add_corpora(corpus)
        else:
            corpusobj = Corpus(corpus)
            for t in corpusobj.texts(): self.add_text(t)

    def corpora(self): return list(self._corpusd.values())
    def metadata(self,fillna='',**kwargs):
        o=[c.metadata(**kwargs) for c in self.corpora()]
        return pd.concat(o).fillna(fillna) if o else pd.DataFrame()

    def graph(self,
            node_types={},
            texts=None,
            force=True,
            ok_props=OK_PROPS,
            bad_props=BAD_PROPS,
            bad_propvals=BAD_PROPVALS,
            incl_text_nodes=True,
            col_id=COL_ADDR,
            min_degree=None,
            min_weight=None,
            remove_isolates=True,
            **kwargs):
        if force or self._g is None:
            self._g = g = nx.Graph()
            for t in self.texts(texts):
                tmeta = t.metadata(wikidata=False)
                
                
                tid = t.meta.get(col_id)
                tnode = f'{col_id}={tid}'
                if t.source is not None and t.source.id:
                    tid_src=f'{col_id}={tid}'
                    tmeta['_id_source']=t.source.id
                
                if not g.has_node(tnode):
                    g.add_node(tnode, **tmeta)
                
                for propname,propval in tmeta.items():
                    if propname == self.col_id: continue
                    if propname in bad_props: continue
                    try:
                        if propval in bad_propvals: continue
                    except TypeError:
                        pass
                    if ok_props and propname not in ok_props: continue
                    
                    propnode=f'{propname}={propval}'
                    if not g.has_node(propnode):
                        g.add_node(propnode)
                    
                    g.add_edge(tnode, propnode)
        

        g = self._g if self._g is not None else nx.Graph()
        if type(node_types) == str: node_types=[node_types]
        if node_types:
            node_types = set(node_types)
            if incl_text_nodes: node_types|={col_id}
            for node in list(g.nodes()):
                if get_node_type(node) not in node_types:
                    g.remove_node(node)
        g = filter_graph(g,min_degree=min_degree,min_weight=min_weight,remove_isolates=remove_isolates,**kwargs)
        return g

    def top_nodes(self,g=None,topn=25,**kwargs):
        if g is None: g=self.graph(**kwargs)
        sdeg = pd.Series({
            k:v
            for k,v in dict(g.degree()).items()
            if not k.startswith('_id') and not k.startswith('_addr')
        }).sort_values(ascending=False)
        return sdeg.head(topn)
    
    def connect(self,texts=None):
        pass

    def neighbors(self,node,g=None,**kwargs):
        if g is None: g=self.graph(**kwargs)
        return g.neighbors(node)
    
    def neighbors_graph(self,node,remove_seed=True,**kwargs):
        neighbs = self.neighbors(node,**kwargs)
        gsub = nx.Graph()
        for neighb in neighbs:
            for neighbs_neighb in self.neighbors(neighb):
                gsub.add_edge(neighb,neighbs_neighb)
        if remove_seed and gsub.has_node(node): gsub.remove_node(node)
        return filter_graph(gsub,**kwargs)
    
    
    def neighbors_df(self,node,fillna='',**kwargs):
        neighbs_graph = self.neighbors_graph(node,**kwargs)
        odf=pd.DataFrame([
            self.text(idx.split('=',1)[-1]).meta
            for idx in neighbs_graph.nodes()
            if idx.startswith(f'{self.id}=')
        ]).fillna(fillna)
        if self.col_id in set(odf.columns): odf=odf.set_index(self.col_id)
        odf = odf.loc[odf.index.drop_duplicates()]
        return odf
        
    

    def nodes(self,node_types={},g=None,data=False,**kwargs):
        if g is None: g=self.graph(**kwargs)
        return [
            node if not data else (node,g.nodes[node])
            for node in g.nodes()
            if not node_types or get_node_type(node) in set(node_types)
        ]
    
    def subgraph(self,nodes=None,g=None,add_neighbors=True,**kwargs):
        if g is None: g=self.graph(force=True,**kwargs)
        if nodes is None: nodes=self.nodes(g=g,**kwargs)
        
        if add_neighbors:
            gsub = nx.Graph()
            for node in nodes:
                for neighb in g.neighbors(node):
                    edge = g.edges[(node,neighb)]
                    if not gsub.has_node(neighb): gsub.add_node(neighb)
                    gsub.add_edge(node,neighb,**edge)
        else:
            gsub = g.subgraph(nodes)
        gsub = filter_graph(gsub,**kwargs)
        return gsub
                

    def edges(**kwargs): pass
        



In [22]:
C = Corpus('markmark')
CG = CorpusGraph(corpora={'markmark'})

In [25]:
C.meta

Unnamed: 0_level_0,author,corpus,dob,dod,gender,name_first,name_last,nation,num_words,ocr_accuracy,title,year,id,_addr_wikidata,name_middle,notes
_addr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
_markmark/Caldwell_Erskine.Tobacco_Road,"Caldwell, Erskine",MarkMark,1903.0,1987.0,M,Erskine,Caldwell,American,69662,0,Tobacco Road,1932,Caldwell_Erskine.Tobacco_Road,_wikidata/Q1169793,,
_markmark/Sinclair_Upton.The_Jungle,"Sinclair, Upton",MarkMark,1878.0,1968.0,M,Upton,Sinclair,American,156031,0,The Jungle,1906,Sinclair_Upton.The_Jungle,_wikidata/Q260205,Beall,
_markmark/Hemingway_Ernest.In_Our_Time,"Hemingway, Ernest Miller",MarkMark,1899.0,1961.0,M,Ernest,Hemingway,American,43243,0,In Our Time,1925,Hemingway_Ernest.In_Our_Time,_wikidata/Q1164998,Miller,
_markmark/Hemingway_Ernest.A_Farewell_to_Arms,"Hemingway, Ernest Miller",MarkMark,1899.0,1961.0,M,Ernest,Hemingway,American,103460,0,A Farewell to Arms,1929,Hemingway_Ernest.A_Farewell_to_Arms,_wikidata/Q235795,Miller,
_markmark/Hemingway_Ernest.For_Whom_the_Bell_Tolls,"Hemingway, Ernest Miller",MarkMark,1899.0,1961.0,M,Ernest,Hemingway,American,191216,0,For Whom the Bell Tolls,1940,Hemingway_Ernest.For_Whom_the_Bell_Tolls,_wikidata/Q200920,Miller,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
_markmark/Sukenick_Ronald.Up,"Sukenick, Ronald",MarkMark,1932.0,2004.0,M,Ronald,Sukenick,American,114213,0,Up,1968,Sukenick_Ronald.Up,_wikidata/Q0,,
_markmark/Bennett_Arnold.The_Old_Wives_Tale,"Bennett, Enoch Arnold",MarkMark,1867.0,1931.0,M,Enoch,Bennett,English,222095,0,The Old Wives' Tale,1908,Bennett_Arnold.The_Old_Wives_Tale,_wikidata/Q7754941,Arnold,
_markmark/Robbins_Tom.Skinny_Legs_and_All,"Robbins, Tom",MarkMark,1936.0,,M,Tom,Robbins,American,179211,0,Skinny Legs and All,1990,Robbins_Tom.Skinny_Legs_and_All,_wikidata/Q7535579,,
_markmark/Donleavy_J.P..The_Ginger_Man,"Donleavy, James Patrick",MarkMark,1926.0,,M,James,Donleavy,Irish American,116047,0,The Ginger Man,1955,Donleavy_J.P..The_Ginger_Man,_wikidata/Q7736458,Patrick,


In [29]:
C = Corpus()
C.text(id='testing')
C.id = 'newwww'
C.metadata(force=True)

[36m[20:57:19][0m [34m[1mlltk.corpus.corpus.Corpus()[0m[36m:1400:[0m Corpus begun: [BaseCorpus](tmp_corpus)


Unnamed: 0_level_0,id
_addr,Unnamed: 1_level_1
_tmp_corpus/testing,testing


In [5]:
t.wikidata.meta

{'_addr': '_wikidata/Q190192',
 'author|P50': 'Frank Herbert|Q7934',
 'award_received|P166': ['Nebula Award for Best Novel|Q266012',
  'Hugo Award for Best Novel|Q255032',
  'Seiun Award for Best Translated Novel|Q27496509'],
 'characters|P674': ['Shaddam Corrino IV|Q509741',
  'Paul Atreides|Q939956',
  'Lady Jessica|Q2724114',
  'Leto I Atreides|Q1078956',
  'Vladimir Harkonnen|Q1051419',
  'Duncan Idaho|Q911563',
  'Gurney Halleck|Q384546',
  'Feyd-Rautha|Q2468873',
  'Chani|Q2724142',
  'Stilgar|Q2280774',
  'Thufir Hawat|Q937394',
  'Glossu Rabban|Q3109248',
  'Wellington Yueh|Q933414',
  'Piter De Vries|Q665717',
  'Alia Atreides|Q2706099',
  'Princess Irulan|Q1407875',
  'Gaius Helen Mohiam|Q1469721',
  'Margot Fenring|Q3290625',
  'Liet-Kynes|Q2583490',
  'Fremen|Q1003839',
  'Hasimir Fenring|Q2279377'],
 'commons_category|P373': 'Dune universe',
 'country_of_origin|P495': 'United States of America|Q30',
 'cover_art_by|P736': 'John Schoenherr|Q6256932',
 'derivative_work|P4969'

In [18]:
g=CG.graph(node_types={'gender'},force=True)
list(g.nodes())

['_addr=_corpus_graph/_markmark/Caldwell_Erskine.Tobacco_Road',
 'gender=M',
 '_addr=_corpus_graph/_markmark/Sinclair_Upton.The_Jungle',
 '_addr=_corpus_graph/_markmark/Hemingway_Ernest.In_Our_Time',
 '_addr=_corpus_graph/_markmark/Hemingway_Ernest.A_Farewell_to_Arms',
 '_addr=_corpus_graph/_markmark/Hemingway_Ernest.For_Whom_the_Bell_Tolls',
 '_addr=_corpus_graph/_markmark/Hemingway_Ernest.The_Old_Man_and_the_Sea',
 '_addr=_corpus_graph/_markmark/Hemingway_Ernest.The_Sun_Also_Rises',
 '_addr=_corpus_graph/_markmark/Hubbard_L._Ron.Fear',
 '_addr=_corpus_graph/_markmark/Hubbard_L._Ron.The_Invaders_Plan',
 '_addr=_corpus_graph/_markmark/Hubbard_L._Ron.Battlefield_Earth',
 '_addr=_corpus_graph/_markmark/Cronin_A._J..The_Keys_of_the_Kingdom',
 '_addr=_corpus_graph/_markmark/Ellison_Ralph.Invisible_Man',
 '_addr=_corpus_graph/_markmark/Hutchinson_A._S._M..If_Winter_Comes',
 '_addr=_corpus_graph/_markmark/Wallace_David_Foster.Infinite_Jest',
 '_addr=_corpus_graph/_markmark/Faulkner_William.L

In [6]:
odf=CG.neighbors_df(qnode)
len(odf)
odf

Unnamed: 0_level_0,_addr,author,babelio_work_id,bibliotheque_nationale_de_france_id,bookbrainz_work_id,canadiana_name_authority_id,commons_category,copyright_status,country_of_origin,dedicated_to,...,motto_text,narrative_motif,narrator,number_of_parts_of_this_work,quora_topic_id,"references_work,_tradition_or_theory",set_in_period,uses,musicbrainz_release_group_id,sheetname
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
_markmark/Conrad_Joseph.The_Secret_Agent,_corpus_graph/_markmark/Conrad_Joseph.The_Secr...,"Conrad, Joseph",3480.0,11972020d,f932da14-0224-471e-bda2-040e550712dc,ncf12125263,The Secret Agent (Conrad),"[public_domain, public_domain]",United Kingdom of Great Britain and Ireland,H.G. Wells,...,,,,,,,,,,
_markmark/James_Henry.The_Portrait_of_a_Lady,_corpus_graph/_markmark/James_Henry.The_Portra...,"James, Henry",,11967604t,,,The Portrait of a Lady,"[public_domain, public_domain]",England,,...,,,,,,,,,,
_markmark/Ballard_J.G..Crash,_corpus_graph/_markmark/Ballard_J.G..Crash,"Ballard, James Graham",,,,,,,United Kingdom,,...,,,,,,,,,,
_markmark/Woolf_Virginia.Mrs._Dalloway,_corpus_graph/_markmark/Woolf_Virginia.Mrs._Da...,"Woolf, Virginia",,119677628,,,,"[public_domain, public_domain]",United Kingdom,,...,,,,,,,,,,
_markmark/Barth_John.The_Sot-Weed_Factor,_corpus_graph/_markmark/Barth_John.The_Sot-Wee...,"Barth, John Simmons",,,,,,,United States of America,,...,,,,,,,,,,
_markmark/Bowen_Elizabeth.The_Death_of_the_Heart,_corpus_graph/_markmark/Bowen_Elizabeth.The_De...,"Bowen, Elizabeth",,,,,,,United States of America,,...,,,,,,,,,,
_markmark/Winsor_Kathleen.Forever_Amber,_corpus_graph/_markmark/Winsor_Kathleen.Foreve...,"Winsor, Kathleen",,,,,,,United States of America,,...,,,,,,,,,,
_markmark/Murdoch_Iris.Under_the_Net,_corpus_graph/_markmark/Murdoch_Iris.Under_the...,"Murdoch, Jean Iris",,,,,,,United Kingdom,,...,,,,,,,,,,
_markmark/Lawrence_D.H..Sons_and_Lovers,_corpus_graph/_markmark/Lawrence_D.H..Sons_and...,"Lawrence, David Herbert",,123192079,,,,"[public_domain, public_domain]",United Kingdom,,...,,,,,,,,,,
_txtlab/EN_1782_BurneyFanny_Cecilia_Novel,_corpus_graph/_txtlab/EN_1782_BurneyFanny_Ceci...,"Burney,Fanny",,16658823v,,,,"[public_domain, public_domain]",,,...,,,,,,,,,,


In [8]:
list(g.neighbors())

TypeError: neighbors() missing 1 required positional argument: 'n'

In [None]:
def 

In [28]:
q='query=Belon, A Relation of the Country of Jansenia'
for node in g.neighbors(q):
    print(node)


_id=_markmark/Hutchinson_A._S._M..If_Winter_Comes
_id=_markmark/Morrison_Toni.Beloved
_id=_markmark/Tolkien_J.R.R..The_Lord_of_the_Rings
_id=_markmark/Green_Henry.Loving
_id=_markmark/Smith_Lillian.Strange_Fruit
_id=_markmark/le_Carré_John.The_Spy_Who_Came_In_from_the_Cold
_id=_markmark/Katz_Steve.Creamy_and_Delicious
_id=_markmark/Conrad_Joseph.Heart_of_Darkness
_id=_markmark/Grey_Zane.The_U.P._Trail
_id=_markmark/Grey_Zane.The_Man_of_the_Forest
_id=_markmark/Huxley_Aldous.Point_Counter_Point
_id=_markmark/Allen_Hervey.Anthony_Adverse
_id=_markmark/Wright_Harold_Bell.The_Eyes_of_the_World
_id=_markmark/Atherton_Gertrude.Black_Oxen
_id=_markmark/Joyce_James.Dubliners
_id=_markmark/de_Lint_Charles.Mulengro
_id=_markmark/de_Lint_Charles.The_Little_Country
_id=_markmark/de_Lint_Charles.Trader
_id=_markmark/de_Lint_Charles.Someplace_to_Be_Flying
_id=_markmark/de_Lint_Charles.Greenmantle
_id=_markmark/de_Lint_Charles.Memory_and_Dream
_id=_markmark/Pasternak_Boris.Doctor_Zhivago
_id=_markmar

In [22]:
from lltk.model.charnet import draw_nx
draw_nx(g)

KeyboardInterrupt: 

In [10]:
CM.texts()[0].meta

{'_addr': '_corpus_map/_markmark/Caldwell_Erskine.Tobacco_Road',
 'author': 'Caldwell, Erskine',
 'country_of_origin': 'United States of America',
 'derivative_work': 'Tobacco Road',
 'encyclopaedia_britannica_online_id': 'topic/Tobacco-Road',
 'form_of_creative_work': 'novel',
 'freebase_id': '/m/03hgtg',
 'genre': 'novel',
 'id': '_markmark/Caldwell_Erskine.Tobacco_Road',
 'instance_of': 'literary work',
 'language_of_work_or_name': 'English',
 'main_subject': 'Great Depression in the United States',
 'narrative_location': 'Georgia',
 'publication_date': '+1932-00-00T00:00:00Z',
 'qid': 'Q1169793',
 'query': 'Caldwell, Tobacco Road',
 'title': 'Tobacco Road',
 'wd_author': 'Erskine Caldwell',
 'wd_author_match': 100,
 'wd_title': 'Tobacco Road',
 'wd_title_match': 100,
 'corpus': 'MarkMark',
 'dob': 1903,
 'dod': 1987,
 'gender': 'M',
 'name_first': 'Erskine',
 'name_last': 'Caldwell',
 'nation': 'American',
 'num_words': 69662,
 'ocr_accuracy': '0.818839539',
 'year': 1932,
 '_id_wi

{'_addr': '_corpus_map/_markmark/Farrell_James_T..The_Studs_Lonigan_Trilogy',
 'id': '_markmark/Farrell_James_T..The_Studs_Lonigan_Trilogy',
 'qid': 'Q0',
 'query': 'Belon, A Relation of the Country of Jansenia',
 'wd_author': '',
 'wd_author_match': 0,
 'wd_title': '',
 'wd_title_match': 0,
 '_id_wikidata': 'Q0',
 'author': 'Farrell, James Thomas',
 'corpus': 'MarkMark',
 'dob': 1904,
 'dod': 1979,
 'gender': 'M',
 'name_first': 'James',
 'name_last': 'Farrell',
 'name_middle': 'Thomas',
 'nation': 'American',
 'num_words': 401729,
 'ocr_accuracy': '0.767196294',
 'title': 'The Studs Lonigan Trilogy',
 'year': 1932}

In [13]:
t.metadata()

[36m[11:33:14][0m [34m[1mwikidata.metadata()[0m[36m:48:[0m Cached: /Users/ryan/lltk_data/corpora/wikidata/texts/Q1169793/meta.json


{'author': 'Erskine Caldwell',
 'instance_of': 'literary work',
 'publication_date': '+1932-00-00T00:00:00Z',
 'country_of_origin': 'United States of America',
 'main_subject': 'Great Depression in the United States',
 'title': 'Tobacco Road',
 'language_of_work_or_name': 'English',
 'narrative_location': 'Georgia',
 'freebase_id': '/m/03hgtg',
 'derivative_work': 'Tobacco Road',
 'form_of_creative_work': 'novel',
 'encyclopaedia_britannica_online_id': 'topic/Tobacco-Road',
 'genre': 'novel',
 'qid': 'Q1169793',
 'wd_title': 'Tobacco Road',
 'wd_author': 'Erskine Caldwell',
 'wd_author_match': 100,
 'wd_title_match': 100,
 'query': 'Caldwell, Tobacco Road',
 'id': 'Q1169793',
 '_addr': '_wikidata/Q1169793'}

In [15]:
t.source.init_meta_json()

{'_addr': '_markmark/Caldwell_Erskine.Tobacco_Road',
 'author': 'Caldwell, Erskine',
 'corpus': 'MarkMark',
 'dob': 1903,
 'dod': 1987,
 'gender': 'M',
 'name_first': 'Erskine',
 'name_last': 'Caldwell',
 'nation': 'American',
 'num_words': 69662,
 'ocr_accuracy': '0.818839539',
 'title': 'Tobacco Road',
 'year': 1932,
 'id': 'Caldwell_Erskine.Tobacco_Road',
 '_id_wikidata': 'Q1169793'}

In [8]:
Wiki.text(**{'id': '_markmark/Capote_Truman.In_Cold_Blood'}).meta

[36m[11:26:30][0m [34m[1mlltk.corpus.corpus.init()[0m[36m:281:[0m Initializing from metadata: [Wikidata](wikidata)


{'author': 'Truman Capote',
 'coordinate_location': {'latitude': 37.985,
  'longitude': -100.999795,
  'altitude': None,
  'precision': None,
  'globe': 'http://www.wikidata.org/entity/Q2'},
 'genre': ['non-fiction_novel', 'nonfiction', 'noir_fiction'],
 'freebase_id': '/m/01d4gg',
 'title': 'In Cold Blood',
 'publication_date': '+1965-00-00T00:00:00Z',
 'country_of_origin': 'United States of America',
 'nnl_item_id': ['001855341', '002666947'],
 'country': 'United States of America',
 'bibliotheque_nationale_de_france_id': '11943257x',
 'encyclopaedia_britannica_online_id': 'topic/In-Cold-Blood-novel-by-Capote',
 'language_of_work_or_name': 'English',
 'oclc_work_id': '4518176135',
 'open_library_id': 'OL1992284W',
 'narrative_location': 'Kansas',
 'biblioteca_nacional_de_espana_id': 'XX1913130',
 'gnd_id': '4451944-8',
 'viaf_id': '6571147270566635700007',
 'getty_iconography_authority_id': '901000850',
 'located_in_the_administrative_territorial_entity': 'Kansas',
 'goodreads_work_i

In [12]:
list(os.walk('.'))

[('.',
  ['.ipynb_checkpoints'],
  ['test_wikidata.ipynb',
   'test.bak.csv',
   'test_textconnections.ipynb',
   'test.csv',
   'test_wikidata2.ipynb',
   'wikidata.py']),
 ('./.ipynb_checkpoints', [], [])]

In [5]:
t.wikidata.metadata()

[36m[10:40:12][0m [34m[1mwikidata.metadata()[0m[36m:42:[0m Cached: /Users/ryan/lltk_data/corpora/wikidata/texts/_markmark/James_Henry.The_Bostonians/meta.json


{'author': 'Henry James',
 'publisher': 'Macmillan Publishers',
 'bibliotheque_nationale_de_france_id': '119599952',
 'freebase_id': '/m/098qrx',
 'narrative_location': ['Boston', 'Massachusetts', 'New_York_City'],
 'country_of_origin': 'United Kingdom',
 'publication_date': '+1886-00-00T00:00:00Z',
 'last_line': 'It is to be feared that with the union, so far from brilliant, into which she was about to enter, these were not the last she was destined to shed.',
 'encyclopaedia_britannica_online_id': 'topic/The-Bostonians-novel-by-James',
 'language_of_work_or_name': 'English',
 'first_line': '“Olive will come down in about ten minutes; she told me to tell you that.”',
 'project_gutenberg_ebook_id': ['19717', '19718'],
 'image': 'HenryJamesPhotograph.png',
 'title': 'The Bostonians',
 'copyright_status': ['public_domain', 'public_domain'],
 'instance_of': 'literary work',
 'musicbrainz_work_id': '539ec42a-1cb0-4828-b841-ce33a565d482',
 'derivative_work': 'The Bostonians',
 'form_of_crea

In [6]:
Wiki.metadata()

Unnamed: 0_level_0,instance_of,author,oclc_control_number,publisher,genre,image,project_gutenberg_ebook_id,country_of_origin,publication_date,language_of_work_or_name,...,wd_author_match,wd_title_match,query,_addr,narrative_location,last_line,first_line,title,musicbrainz_work_id,derivative_work
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
_markmark/Anderson_Sherwood.Winesburg_Ohio,"['literary_work', 'short_story_collection']",Sherwood Anderson,607825.0,B. W. Huebsch,short story cycle,"Winesburg, Ohio 1st.jpg",416,United States of America,+1919-00-00T00:00:00Z,American English,...,100,100,"Anderson, Winesburg",_wikidata/_markmark/Anderson_Sherwood.Winesbur...,,,,,,
_markmark/James_Henry.The_Bostonians,literary work,Henry James,,Macmillan Publishers,novel,HenryJamesPhotograph.png,"[19717, 19718]",United Kingdom,+1886-00-00T00:00:00Z,English,...,100,100,"James, The Bostonians",_wikidata/_markmark/James_Henry.The_Bostonians,"[Boston, Massachusetts, New_York_City]","It is to be feared that with the union, so far...",“Olive will come down in about ten minutes; sh...,The Bostonians,539ec42a-1cb0-4828-b841-ce33a565d482,The Bostonians


[BaseText](_markmark/McCarthy_Cormac.Suttree)

In [15]:
t = Wiki.texts()[0].source
t

[BaseText](_markmark/Gaddis_William.JR)

In [18]:
t.metadata()

{'_addr': '_markmark/Gaddis_William.JR',
 '_llp_': 'markmark|Gaddis,_William.JR',
 'author': 'Gaddis, William',
 'corpus': 'MarkMark',
 'dob': '1922.0',
 'dod': '1998.0',
 'gender': 'M',
 'name_first': 'William',
 'name_last': 'Gaddis',
 'nation': 'American',
 'num_words': 387248,
 'ocr_accuracy': '0.7541239722349999',
 'title': 'JR',
 'year': 1975,
 'name_middle': 'Thomas',
 'id': 'Gaddis_William.JR'}

In [19]:
t.path_meta

[36m[10:19:14][0m [34m[1mlltk.text.text.__getattr__()[0m[36m:108:[0m Error getting attribute "path_meta": can only concatenate str (not "NoneType") to str


In [22]:
C.t.get_path('meta')

TypeError: expected str, bytes or os.PathLike object, not NoneType

In [4]:
read_df(C.path_metadata)

Unnamed: 0,id,author,title,year,idref,pub,type,name,nation,medium,...,viaf_id,gnd_id,characters,bibliotheque_nationale_de_france_id,idref_id,oclc_work_id,kinematoscope_literary_work_id,virtue,vice,wfwefew
0,_chadwyck/Eighteenth-Century_Fiction/burney.01,Frances Burney,Evelina (1778),1778.0,Z200000830,"Printed for T. Lowndes, [etc.]",BOOK,Vol. 1,British,Fiction,...,,,,,,,,,,
1,_chadwyck/Eighteenth-Century_Fiction/richards.01,Samuel Richardson,"Clarissa, or, the History of a Young Lady",1748.0,Z200044053,Printed for S. Richardson: [etc.],BOOK,Vol. 1,British,Fiction,...,181856293.0,4138842-2,Clarissa Harlowe,11940494z,27342077.0,8914116000.0,229.0,,,
2,testing,,,,,,,,,,...,,,,,,,,1.0,,
3,testing2,,,,,,,,,,...,,,,,,,,1.0,,
4,testing3,,,,,,,,,,...,,,,,,,,1.0,,
5,testing4,,,,,,,,,,...,,,,,,,,1.0,,
6,testing126512,,,,,,,,,,...,,,,,,,,1.0,1.0,
7,testing949127,,,,,,,,,,...,,,,,,,,1.0,1.0,0.0
8,testing609238,,,,,,,,,,...,,,,,,,,1.0,1.0,0.0


In [10]:
read_df(get_backup_fn(C.path_metadata))

Unnamed: 0,id,_addr
0,_chadwyck/Eighteenth-Century_Fiction/burney.01,_epistolary/_chadwyck/Eighteenth-Century_Ficti...
1,_chadwyck/Eighteenth-Century_Fiction/richards.01,_epistolary/_chadwyck/Eighteenth-Century_Ficti...
2,testing,_epistolary/testing
3,testing2,_epistolary/testing2
4,testing3,_epistolary/testing3


In [4]:
C.metadata().columns

Index(['author', 'title', 'year', 'idref', 'pub', 'type', 'name', 'nation',
       'medium', 'subcorpus',
       ...
       'dpla_subject_term', 'google_doodle', 'kbpedia_id', 'kallias_id',
       'omegawiki_defined_meaning', 'museum_of_modern_art_work_id', 'volume',
       'issue', 'page(s)_(p304)', 'noosfere_story_id'],
      dtype='object', length=211)

In [30]:
!ls

test.bak.txt               test_wikidata.ipynb
test.txt                   test_wikidata2.ipynb
test_textconnections.ipynb wikidata.py


In [6]:
C.metadata()

Unnamed: 0_level_0,author,title,year,idref,pub,type,name,nation,medium,subcorpus,...,dpla_subject_term,google_doodle,kbpedia_id,kallias_id,omegawiki_defined_meaning,museum_of_modern_art_work_id,volume,issue,page(s)_(p304),noosfere_story_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Eighteenth-Century_Fiction/paltock.01,"Paltock, Robert, 1697-1767",Peter Wilkins (1751),1751,Z200043982,"Printed for J. Robinson, ... and R. Dodsley, [...",BOOK,Vol. 1,British,Fiction,Eighteenth-Century_Fiction,...,,,,,,,,,,
Eighteenth-Century_Fiction/brookefm.02,"Brooke, Frances, 1724?-1789",Lady Julia Mandeville (1763),1763,Z200000703,Printed for R. and J. Dodsley [etc.],BOOK,Vol. 1,British,Fiction,Eighteenth-Century_Fiction,...,,,,,,,,,,
Eighteenth-Century_Fiction/mackenzi.01,"Mackenzie, Henry, 1745-1831",Julia de Roubigné (1777),1777,Z200030400,"Printed for W. Strahan, and T. Cadell",BOOK,Vol. 1,British,Fiction,Eighteenth-Century_Fiction,...,,,,,,,,,,
Eighteenth-Century_Fiction/cleland.01,"Cleland, John, 1709-1789",Memoirs of a Woman of Pleasure (1749),1749,Z200000927,Printed for G. Fenton [etc.],BOOK,Vol. 1,British,Fiction,Eighteenth-Century_Fiction,...,,,,,,,,,,
Eighteenth-Century_Fiction/haywood.07,"Haywood, Eliza Fowler, 1693?-1756",The Fortunate Foundlings (1744),1744,Z200029933,Printed and published by T. Gardner [etc.],BOOK,Title Page,British,Fiction,Eighteenth-Century_Fiction,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Early_English_Prose_Fiction/ee54010.01,"Holland, Samuel, gent.",Don Zara Del Fogo (1656),1656,Z200027421,Printed by T. W. for Tho. Vere [etc.],BOOK,Title Page,British,Fiction,Early_English_Prose_Fiction,...,,,,,,,,,,
Early_English_Prose_Fiction/ee17010.04,"Orrery, Roger Boyle, Earl of, 1621-1679","Parthenissa, Part 3 (1655)",1655,Z200025922,Printed for Humphrey Moseley [etc.],BOOK,Title Page,British,Fiction,Early_English_Prose_Fiction,...,,,,,,,,,,
Early_English_Prose_Fiction/ee73020.01,"Oldys, Alexander",The Female Gallant (1692),1692,Z200027810,Printed for Samuel Briscoe [etc.],BOOK,Title Page,British,Fiction,Early_English_Prose_Fiction,...,,,,,,,,,,
Early_English_Prose_Fiction/ee24020.03,"Newcastle, Margaret Cavendish, Duchess of, 162...",The Description of a New World (1666),1666,Z200026251,Printed by A. Maxwell,BOOK,Title Page,British,Fiction,Early_English_Prose_Fiction,...,,,,,,,,,,
