/
pydwarf.diff.py
152 lines (138 loc) · 7.16 KB
/
pydwarf.diff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
import pydwarf
import difflib
import raws
class diffrecord:
'''Class for keeping track of differences found by difflib.SequenceMatcher'''
def __init__(self, a, b, bpath, op, afrom, auntil, bfrom, buntil):
self.a = a
self.b = b
self.bpath = bpath
self.op = op
self.afrom = afrom
self.auntil = auntil
self.bfrom = bfrom
self.buntil = buntil
self.alower = a[afrom]
self.aupper = a[auntil] if auntil < len(a) else a[-1]
self.blower = b[bfrom]
self.bupper = b[buntil] if buntil < len(b) else b[-1]
self.ignore = False
self.conflicts = None
def atokens(self):
for i in xrange(self.afrom, min(len(self.a), self.auntil)): yield self.a[i]
def btokens(self):
for i in xrange(self.bfrom, min(len(self.b), self.buntil)): yield self.b[i]
def __str__(self):
return '%s %d:%d' % ({'equal': '..', 'delete': '--', 'insert': '++', 'replace': '//'}[self.op], self.afrom, self.auntil)
@pydwarf.urist(
name = 'pineapple.diff',
title = 'Diff-based Mod Merging',
version = '1.1.0',
author = 'Sophie Kirschner',
description = '''Merges and applies changes made to some modded raws via diff checking.
Should be reasonably smart about automatic conflict resolution but if it complains
then I recommend giving things a manual checkover afterwards. Also, the token-based
diff'ing approach should work much better than any line-based diff. Using this tool
to apply mods made to other versions of Dwarf Fortress probably won't work so well.''',
arguments = {
'paths': '''Must be be one or more file paths in a list, where each
path points to a directory with the same structure as a normal
Dwarf Fortress install; the raw files inside the directories will
be added to the modded install if they did not already exist and,
if they did exist, then this script will attempt to merge them.'''
},
compatibility = '.*'
)
def diff(df, paths):
# Get all the files in the mods
newfiles = []
for path in paths:
if os.path.isdir(path):
rfiles = [
file for file in raws.dir(root=path).files.values()
if isinstance(file, raws.rawfile)
]
else:
return pydwarf.failure('Failed to load raws from path %s.' % path)
newfiles.append(rfiles)
operations = {}
conflicts = 0
currentfiletokensdict = {}
for newfilelist in newfiles:
for newfile in newfilelist:
pydwarf.log.info('Handling diff for file %s...' % newfile.path)
# Get list of tokens for current file (And don't do it for the same file twice)
currentfiletokens = None
if str(newfile) in currentfiletokensdict:
currentfiletokens = currentfiletokensdict[str(newfile)]
elif str(newfile) in df.files:
currentfiletokens = list(df[str(newfile)].tokens())
currentfiletokensdict[str(newfile)] = currentfiletokens
# Do a diff
if currentfiletokens:
newfiletokens = list(newfile.tokens())
diff = difflib.SequenceMatcher(
None, currentfiletokens, newfiletokens
)
if str(newfile) not in operations:
operations[str(newfile)] = {
'insert': [], 'delete': [], 'replace': [], 'equal': []
}
for item in diff.get_opcodes():
if item[0] != 'equals':
op = item[0]
operations[str(newfile)][op].append(diffrecord(currentfiletokens, newfiletokens, newfile.path, *item))
# File doesn't exist yet, don't bother with a diff
else:
pydwarf.log.debug('File didn\'t exist yet, adding...')
df.add(newfile.copy())
for fileheader, fileops in operations.iteritems():
# Do some handling for potentially conflicting replacements
for i in xrange(0, len(fileops['replace'])):
irecord = fileops['replace'][i]
if not irecord.ignore:
for j in xrange(i+1, len(fileops['replace'])):
jrecord = fileops['replace'][j]
# Replacements overlap?
if (jrecord.bpath is not irecord.bpath) and (irecord.afrom <= jrecord.auntil and jrecord.afrom <= irecord.auntil):
jrecord.ignore = True
if not raws.helpers.tokensequal(irecord.btokens(), jrecord.btokens()):
# Replacements aren't identical (this means there's a conflict)
if not irecord.conflicts: irecord.conflicts = []
irecord.conflicts.append(jrecord)
# Make replacements (insertions)
for record in fileops['replace']:
if not record.ignore:
if record.conflicts is None:
tokens = record.btokens()
else:
# Handle conflicts
pydwarf.log.error('Encountered potentially conflicting changes in %s, block replaced by %d input files.' % (fileheader, len(record.conflicts)+1))
tokens = []
lasttoken = None
for conflict in record.conflicts + [record]:
conflict.blower.prefix = '\n<<<diff from %s;%s' % (conflict.bpath, conflict.blower.prefix if conflict.blower.prefix else '')
for token in conflict.btokens():
lasttoken = token
tokens.append(token)
lasttoken.suffix = '%s\n>>>\n' % (lasttoken.suffix if lasttoken.suffix else '')
tokens[0].prefix = '\n<<<<<<diff potential conflict! block modified by %d files %s;\n%s' % (len(record.conflicts)+1, ', '.join([r.bpath for r in record.conflicts] + [record.bpath]), tokens[0].prefix if tokens[0].prefix else '')
lasttoken.suffix = '%s\n>>>>>>\n\n' % (lasttoken.suffix if lasttoken.suffix else '')
conflicts += 1
tokens = record.alower.add(tokens=raws.helpers.copytokens(tokens))
# Make insertions
for record in fileops['insert']:
record.alower.add(raws.helpers.copytokens(tokens=record.btokens()))
# Make deletions
for record in fileops['delete']:
for token in record.atokens():
token.remove()
# Make replacements (deletions)
for record in fileops['replace']:
for token in record.atokens():
token.remove()
if conflicts == 0:
return pydwarf.success('Merged %d mods without conflicts.' % len(paths))
else:
return pydwarf.success('Merged %d mods with %d conflicts. Recommended you search in outputted raws for text like "<<<<<<diff potential conflict!" and resolve manually.' % (len(paths), conflicts))