Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
lingulist committed Nov 12, 2018
1 parent 594f00f commit 410f166
Show file tree
Hide file tree
Showing 8 changed files with 7,422 additions and 0 deletions.
4 changes: 4 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include *.md LICENSE
graft src
global-exclude *.py[co]

2 changes: 2 additions & 0 deletions pip-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
clldutils
segments
29 changes: 29 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#import distribute_setup
#distribute_setup.use_setuptools()

from setuptools import setup, find_packages,Extension
import codecs
# setup package name etc as a default
pkgname = 'poepy'


setup(
name=pkgname,
description="A Python library for handling annotated rhymes.",
version='0.1.0',
packages=find_packages(where='src'),
package_dir={'': 'src'},
zip_safe=False,
license="GPL",
include_package_data=True,
install_requires=['lingpy'],
url='https://github.com/lingpy/poepy',
long_description=codecs.open('README.md', 'r', 'utf-8').read(),
long_description_content_type='text/markdown',
#entry_points={
# 'console_scripts': ['sinopy=sinopy.cli:main'],
#},
author='Johann-Mattis List',
author_email='list@shh.mpg.de',
keywords='Chinese linguistics, historical linguistics, computer-assisted language comparison'
)
1 change: 1 addition & 0 deletions src/poepy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .poepy import *
18 changes: 18 additions & 0 deletions src/poepy/conf/poems.rc
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Basic namespaces
counterpart str counterpart
doculect str language,doculect,dialect,taxon,taxa
concept str gloss,concept,concepts
iso str iso,isocode
tokens lambda x:x.split() tokens,tokenized_counterpart,ipatokens
segments lambda x:x.split() segments

alignment lambda x: x.split() alignment
line_order int line_order

# specific names for alignments
conceptid int conceptid

# New namespaces for partial cognacy
rhymeids lambda x: [int(s) for s in x.split()] crossids


7,287 changes: 7,287 additions & 0 deletions src/poepy/data/Wang1980.tsv

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions src/poepy/data/references.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

@Book{Wang1980,
Title = {{S}hījīng {Y}ùndú},
Address = {Shànghǎi 上海},
Author = {Wáng Lì 王力},
Publisher = {Shànghǎi Gǔjī 上海古籍},
Year = {1980},

Owner = {mattis},
Shortauthor = {Wáng},
Timestamp = {2016.01.09},
Usera = {Rhyme readings in the Book of Odes},
Userb = {詩經韻讀}
}

66 changes: 66 additions & 0 deletions src/poepy/poepy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from clldutils.path import Path, remove, path_component
from segments import Tokenizer
from lingpy import *
import networkx as nx
from tqdm import tqdm
from itertools import combinations
from tabulate import tabulate

def poepy_path(*comps):
return Path(__file__).parent.joinpath(*comps).as_posix()


class Poems(Alignments):

def __init__(self, infile, ref='rhymeids', line='line', poem='poem',
stanza='stanza', alignment='alignment', fuzzy=True,
conf=poepy_path('conf', 'poems.rc'), **keywords):

Alignments.__init__(self, infile, col=poem, row=stanza, conf=conf,
segments=line, ref=ref, alignment=alignment, fuzzy=fuzzy,
transcription='line_in_source')

def stats(self):
print('Poems: {0}'.format(len(self.cols)))
print('Stanzas: {0}'.format(len(self.rows)))
print('Lines: {0}'.format(len(self)))
print('Rhyme words: {0}'.format(sum([len(self.msa[self._ref][key]['ID']) for key
in self.msa[self._ref]])))
print('Rhymes: {0}'.format(len(self.msa[self._ref])))
print('Words: {0}'.format(sum([len(self[idx, 'line']) for idx in
self])))

def get_rhyme_network(self, ref='rhymeids'):
G = nx.Graph()

for key, msa in tqdm(self.msa[ref].items()):
for idx, seq in zip(msa['ID'], msa['seqs']):
node = ' '.join(seq)
try:
G.node[node]['weight'] += 1
G.node[node]['occurrences'] += [str(idx)]
except KeyError:
G.add_node(node, weight=1, occurrences=[str(idx)])

for (idxA, seqA), (idxB, seqB) in combinations(
zip(msa['ID'], msa['seqs']), r=2):
nodeA, nodeB = ' '.join(seqA), ' '.join(seqB)
try:
G[nodeA][nodeB]['weight'] += 1
G[nodeA][nodeB]['stanza'] += [self[idx, 'stanza']]
except KeyError:
G.add_edge(nodeA, nodeB, weight=1, stanza=[self[idx,
'stanza']])
self.G = G

def get_connected_components(self):
if not hasattr(self, 'G'):
raise ValueError('compute the rhyme network first')
self.comps = {}
for i, comp in enumerate(nx.connected_components(self.G)):
self.comps[i+1] = list(comp)

#def pprint(self, stanzas):
# table = []
# for stanza in stanzas:

0 comments on commit 410f166

Please sign in to comment.