Skip to content

Commit

Permalink
csv export fix
Browse files Browse the repository at this point in the history
  • Loading branch information
thePortus committed Nov 28, 2016
1 parent 44d66cd commit ced49e6
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ os: linux
dist: precise
# setting environment variables
env:
- PACKAGE_VERSION=0.2.1
- PACKAGE_VERSION=0.2.2
# command to install dependencies
install:
- pip install -r requirements/ci-testing.txt
Expand Down
5 changes: 4 additions & 1 deletion arakhne/corpus/corpus_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def load(self, path):
dialect=self.settings['dialect']
)
# Build corpus list of metadata column names
self.corpus.settings['meta_fields'] = []
for fieldname in csv_reader.fieldnames:
if fieldname != self.settings['text_col']:
self.corpus.settings['meta_fields'].append(fieldname)
Expand All @@ -85,16 +86,18 @@ def save(self, path):
path = self.mk_path(path)
self.test_save(path)
print('Saving to', path)
# Open file with specified settings
with open(
path,
'w+',
encoding=self.settings['encoding'],
newline=self.settings['newline']
) as csv_file:
# Build known field names from metadata fields and text col names
fieldnames = []
print(self.corpus)
fieldnames.extend(self.corpus.settings['meta_fields'])
fieldnames.append(self.corpus.settings['text_col'])
# Prepare the csv dict writer with specified settings
csv_writer = csv.DictWriter(
csv_file,
delimiter=self.settings['delimiter'],
Expand Down
11 changes: 7 additions & 4 deletions arakhne/doc/base_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@ def stringify(self):
return str(self.data)

def rm_lines(self):
rexr = re.compile(r'\n+')
# substituting single endlines for matching endline blocks
clean_text = rexr.sub(' ', self.data)
return self.__class__(
self.data
.replace('-\n ', '').replace('- \n', '').replace('-\n', '')
.replace(' - ', '').replace('- ', '').replace(' -', '')
.replace('\n', ' '),
clean_text
.replace('-\n ', '').replace('- \n', '').replace('-\n', '')
.replace(' - ', '').replace('- ', '').replace(' -', '')
.replace('\n', ' '),
self.metadata
)

Expand Down
14 changes: 7 additions & 7 deletions arakhne/fixtures/test_files/test_out.csv
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
book,chapter,section,title,book,chapter,section,title,text
7,1,2,7.1.2,7,1,2,7.1.2,"[2]
book,chapter,section,title,text
7,1,2,7.1.2,"[2]
Cuius rei verisimilis causa adferebatur, quod Gallis omnibus cognitum esset neque ulla multitudine in unum locum coacta resisti posse Romanis, nec, si diversa bella complures eodem tempore intulissent civitates, satis auxili aut spati aut copiarum habiturum exercitum populi Romani ad omnia persequenda; non esse autem alicui civitati sortem incommodi recusandam, si tali mora reliquae possent se vindicare in libertatem."
1,2,6,1.2.6,1,2,6,1.2.6,"[6]
1,2,6,1.2.6,"[6]
Pro multitudine autem hominum et pro gloria belli atque fortitudinis angustos se fines habere arbitrabantur, qui in longitudinem milia passuum CCXL, in latitudinem CLXXX patebant."
1,2,3,1.2.3,1,2,3,1.2.3,"[3]
1,2,3,1.2.3,"[3]
Id hoc facilius iis persuasit, quod undique loci natura Helvetii continentur: una ex parte flumine Rheno latissimo atque altissimo, qui agrum Helvetium a Germanis dividit; altera ex parte monte Iura altissimo, qui est inter Sequanos et Helvetios; tertia lacu Lemanno et flumine Rhodano, qui provinciam nostram ab Helvetiis dividit."
1,2,5,1.2.5,1,2,5,1.2.5,"[5]
1,2,5,1.2.5,"[5]
qua ex parte homines bellandi cupidi magno dolore adficiebantur."
1,2,4,1.2.4,1,2,4,1.2.4,"[4]
1,2,4,1.2.4,"[4]
His rebus fiebat ut et minus late vagarentur et minus facile finitimis bellum inferre possent;"
1,2,2,1.2.2,1,2,2,1.2.2,"[2]
1,2,2,1.2.2,"[2]
perfacile esse, cum virtute omnibus praestarent, totius Galliae imperio potiri."
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
'name': "arakhne",
'packages': ["arakhne"],
'install_requires': ["nltk", "cltk"],
'version': "0.2.1",
'description': "CLTK Wrapper Functions for Greek/Latin Text Analysis",
'version': "0.2.2",
'description': "Arakhne Text Loom for Corpus-Based Classical-Language Analytics",
'author': "David J. Thomas",
'author_email': "dave.a.base@gmail.com",
'url': "https://github.com/thePortus/arakhne",
Expand Down

0 comments on commit ced49e6

Please sign in to comment.