forked from OpenAPC/openapc-de
-
Notifications
You must be signed in to change notification settings - Fork 0
/
deal_wiley_extract.py
executable file
·52 lines (40 loc) · 1.7 KB
/
deal_wiley_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import argparse
import openapc_toolkit as oat
ARG_HELP_STRINGS = {
"enriched_file": "A fully enriched file to extract hybrid Wiley articles from"
}
EMPTY_LINE_CORE = ["" for i in range(18)]
EMPTY_LINE_TA = ["" for i in range(19)]
# Wiley and imprints
PUBLISHER_LIST = ["Wiley-Blackwell", "EMBO", "American Geophysical Union (AGU)",
"International Union of Crystallography (IUCr)"]
AGREEMENT_NAME = "DEAL Wiley Germany"
QUOTE_MASK = [True, False, False, True, True, True, True, True, True, True, True, True, True, True,
True, True, True, True, True]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("enriched_file", help=ARG_HELP_STRINGS["enriched_file"])
args = parser.parse_args()
header, content = oat.get_csv_file_content(args.enriched_file, enc="utf-8", force_header=True)
header_line = header[0]
core_content = [list(header_line)]
ta_content = [list(header_line) + ["agreement"]]
print(core_content)
print(ta_content)
for line in content:
if line[4] == "TRUE" and line[5] in PUBLISHER_LIST:
core_content.append(list(EMPTY_LINE_CORE))
ta_content.append(line + [AGREEMENT_NAME])
else:
core_content.append(line)
ta_content.append(list(EMPTY_LINE_TA))
with open("out_orig.csv", "w") as out:
writer = oat.OpenAPCUnicodeWriter(out, QUOTE_MASK, True, True)
writer.write_rows(core_content)
with open("out_deal_wiley.csv", "w") as out:
writer = oat.OpenAPCUnicodeWriter(out, QUOTE_MASK, True, True)
writer.write_rows(ta_content)
if __name__ == '__main__':
main()