-
Notifications
You must be signed in to change notification settings - Fork 1
/
writeCSV_both.py
57 lines (43 loc) · 1.49 KB
/
writeCSV_both.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#This script finds the duplicates in the relatesMASTER.csv files and writes
#them out to a new csv. These are the relationships that exist in both the LJ
#and CH datasets
#note: to run script, remove the "CH" or "LJ" designations from relatesMaster.csv
import csv
duplicate_rows = []
unique_rows = []
with open ('relatesMaster.csv', 'r') as f:
reader = csv.reader(f)
with open ('relatesBOTH.csv', 'w', newline='') as g:
writer = csv.writer(g)
for row in reader:
if row not in unique_rows:
unique_rows.append(row)
else:
duplicate_rows.append(row)
for a_row in duplicate_rows:
source = a_row[0]
target = a_row[1]
writer.writerows([[source, target]])
print (len(duplicate_rows))
#this script (below) was used to assure that there were no duplicates in the
#relatesLJ.csv and relatesCH.csv because there were combined manually to
#make the relatesMASTER.csv. This assures that the script above is only finding
#relationships that overlap BETWEEN the LJ and CH datasets.
##import csv
##
##duplicate_rows = []
##unique_rows = []
##
##with open ('relatesLJ.csv', 'r') as f:
## reader = csv.reader(f)
##
##
## for row in reader:
##
## if row not in unique_rows:
## unique_rows.append(row)
## else:
## duplicate_rows.append(row)
##
##
##print (duplicate_rows)