-
Notifications
You must be signed in to change notification settings - Fork 0
/
cli.py
executable file
·70 lines (54 loc) · 3.07 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/python
# -*- coding: utf-8 -*-
#this file provides CLI hooks for the functions in the OPUS library. For running many kabgyages, look at main.py
import opus, click
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
@click.group(CONTEXT_SETTINGS)
def cli():
pass
#makeDirectoryStructure
@cli.command()
@click.option('--slowstoragedir', type=click.Path(), help="Directory on the largest storage medium available", required=True)
def makeDirectoryStructure(slowstoragedir):
'''build the directory structure for holding the downloads and processed files'''
opus.makeDirectoryStructure(slowstoragedir)
#downloadLanguage
@cli.command()
@click.option('--language', type=str, help="2-letter designation for the language", required=True)
@click.option('--downloadpath', type=click.Path(), help="Directory where the downloaded, archived files should go", required=True)
@click.option('--expandpath', type=click.Path(), help="Directory where the download should be expanded to", required=True)
def downloadLanguage(language, downloadpath, expandpath):
'''download the tar file and extract it for a specific language'''
opus.downloadLanguage(language, downloadpath, expandpath)
#extractText
@cli.command()
@click.option('--inputfile', type=click.Path(), help="Path of the compressed input file", required=True)
@click.option('--outputfile', type=click.Path(), help="Path for the yielded text file", required=True)
def extractText(inputfile, outputfile, remap=None):
'''unzip, parse the XML, and extract the sentences from a compressed movie file ffom OPUS'''
opus.extractText(inputfile, outputfile, remap)
#processLanguage
@cli.command()
@click.option('--language', type=str, help="2-letter designation for the language", required=True)
@click.option('--expandpath', type=click.Path(), help="Directory of the expanded corpus", required=True)
@click.option('--outputdir', type=click.Path(), help="Directory where processed text files should go", required=True)
def processLanguage(language, expandpath, outputdir):
'''Run extract text on a large number of .gz files'''
opus.processLanguage(language, expandpath, outputdir)
#combineLanguage
@cli.command()
@click.option('--combinedpath', type=click.Path(), help="Directory of the cleaned, individual text files", required=True)
@click.option('--outputfile', type=click.Path(), help="Path of the combined file", required=True)
def combineLanguage(combinedpath, outputfile):
opus.combineLanguage(combinedpath, outputfile)
#extractText
@cli.command()
@click.option('--inputfile', type=click.Path(), help="Path of the input OPUS file", required=True)
@click.option('--mergefile', type=click.Path(), help="Path of the file to merge against", required=True)
@click.option('--outputfile', type=click.Path(), help="Path for the yielded text file", required=True)
def augmentOPUSfile(inputfile, mergefile, outputfile):
'''augment an OPUS files with additional annotations, e.g. adding a column with segmented Sampa from Lexique to the French data,
or segmented '''
opus.augmentOPUSfile(inputfile, mergefile, outputfile)
if __name__ == '__main__':
cli()