In [1]:
# Does not need to be executed if
# ~/.ipython/profile_default/ipython_config.py
# exists and contains:
# c.InteractiveShell.ast_node_interactivity = 'all'

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
# 1.0 Read files
file_1 = 'file_1_1.txt'
file_2 = 'file_1_2.txt'

def shorten_lines(file):    
    line_templates = {
        "A changed line":'C',
        "A line to delete":'D',
        "A line to insert":'I',
        "A line to change": 'TC',
        "A line that stays":'S'}

    shortened_lines = []
    with open(file) as f1:
        f1_lines = f1.readlines()
        for line in f1_lines:
            line_split = line.split(": ")
            shortened_lines.append(line_templates[line_split[0]] + line_split[1].strip())
    return shortened_lines


file_1_shortened = shorten_lines(file_1)
file_2_shortened = shorten_lines(file_2)
print(file_1_shortened)
print(file_2_shortened)

['D1', 'D2', 'S1', 'S2', 'TC1', 'S3', 'S4', 'S5', 'S6', 'D3', 'S7', 'S8', 'TC2', 'TC3', 'TC4', 'TC5', 'S9']
['S1', 'I1', 'S2', 'C1', 'S3', 'S4', 'I2', 'I3', 'S5', 'S6', 'S7', 'S8', 'C2', 'C3', 'C4', 'S9']


In [3]:
# Working Space
import re

def valid_file_lines(file_lines):
    # 4.1 If we have more than one comma, return false
    if len(re.findall("[,]",file_lines)) > 1:
        return False
    if file_lines.__contains__(','):
        n1, n2 = file_lines.split(',')
        # 4.2 If we have any non-numeric, return False
        if not n1.isnumeric() or not n2.isnumeric():
            return False
        # 4.3 If the 2nd number is smaller than the first, return False
        if int(n2) <= int(n1):
            return False
    return True


def check_input_lines(lines):
    valid_input = True
    lcs_size = 0
    file_1_prev = 0
    file_2_prev = 0
    
    # 0. Check for empty file
    if len(lines) == 0:
        return False
    
    for line in lines:
        
        # 1. Check for spaces in the line 
        # print(line.strip('\n'))
        if line.__contains__(' '):
            return False
        
        # 2. Check if we have a valid command
        command = re.findall("[A-z]",line)
        # print(command)
        if len(command) != 1 or not command[0] in ['a', 'd', 'c']:
            return False
        
        # 3. Retrieve 2x variables for the first file lines, command, and second file lines
        command = command[0]
        first_file_lines, second_file_lines = line.split(command)
        second_file_lines = second_file_lines.strip('\n')
        # print(first_file_lines + " " + command + " " + second_file_lines)
        
        # 4. Need to check if line numbers are valid
        if not valid_file_lines(first_file_lines) or not valid_file_lines(second_file_lines):
            return False
        
        # 5. Depending on the command we need to check different things
        # 5.1 Check for add commandd is of the format: [1]+[a]+[2.1,2.2]
        if command == 'a' : 
            if first_file_lines.__contains__(','):
                return False
        
        # 5.3 Check for delete commandd is of the format: [1.1,1.2]+[d]+[1]
        if command == 'd' : 
            if second_file_lines.__contains__(','):
                return False
            
        # 6. Check if the distances between previous commands is constant (i.e. checking for the 'same' lines)
        file_1_command_start, file_1_command_end = get_command_positions(first_file_lines, command, 1)
        file_2_command_start, file_2_command_end = get_command_positions(second_file_lines, command, 2)    
            
        # print(f'File 1: Prev = {file_1_prev} Command Start = {file_1_command_start}, Command End = {file_1_command_end}')
        # print(f'File 2: Prev = {file_2_prev} Command Start = {file_2_command_start}, Command End = {file_2_command_end}')
        # print(f'F1: Start - Prev = {file_1_command_start - file_1_prev}')
        # print(f'F2: Start - Prev = {file_2_command_start - file_2_prev}')
        
        if (file_1_command_start - file_1_prev) != (file_2_command_start - file_2_prev):
            return False
        else:
            # The difference is the lines that stay the same --> i.e. contribute to the # of common lines
            lcs_size += file_1_command_start - file_1_prev

        # Start of 2nd command is not strictly greater than the end of the previous command    
        if line != lines[0] and (file_1_command_start == file_1_prev or file_2_command_start == file_2_prev):
            # print('Start of 2nd command is not strictly greater than the end of the previous command')
            return False

        file_1_prev = file_1_command_end
        file_2_prev = file_2_command_end
        # print(f'LCS Implied = {lcs_size}')
        # print()
            
    return True
    
def get_command_positions(line_commands, command, file):
    # Initiailise the start and end positions depending on if we have a single line or a set (tuple) of lines
    if line_commands.__contains__(','):
        file_command_start = int(line_commands.split(',')[0])
        file_command_end = int(line_commands.split(',')[1])
    else:
        file_command_start = int(line_commands)
        file_command_end = int(line_commands)
        
    # Need to adjust start/end position based on what file and command we're working with 
    if command == "a" and file == 2:
        file_command_start -= 1
    if command == "c":
        file_command_start -= 1
    if command == "d" and file == 1:
        file_command_start -= 1
    return file_command_start, file_command_end
     
class DiffCommandsError(Exception):
    pass

class DiffCommands:
    def __init__(self, filename):
        with open(filename, 'r') as f:
            self.lines = f.readlines()
        if not check_input_lines(self.lines):
            raise DiffCommandsError('Cannot possibly be the commands for the diff of two files')
            
    def __str__(self):
        # TODO: last line has a '\n' that may need to be ignored
        return ''.join(self.lines).strip('\n')
    

class OriginalNewFiles:
    def __init__(self, original_filename, new_filename):
        with open(original_filename, 'r') as f1, open(new_filename, 'r') as f2:
            self.first_file = f1.readlines()
            self.second_file = f2.readlines()
            
    def print_first_file(self):
        print(''.join(self.first_file).strip('\n'))
    
    def print_second_file(self):
        print(''.join(self.second_file).strip('\n'))
    
    
    def is_a_possible_diff(self, diff_file):
        return True
    
    def output_diff(self, diff_file):
        for line in diff_file.lines:
            print(line.strip('\n'))
            command = re.findall("[A-z]",line)[0]
            first_file_lines, second_file_lines = line.split(command)
            second_file_lines = second_file_lines.strip('\n')

            file_1_command_start, file_1_command_end = get_command_positions(first_file_lines, command, 1)    
            file_2_command_start, file_2_command_end = get_command_positions(second_file_lines, command, 2)    

            if command == 'a':
                for i in range(file_2_command_start, file_2_command_end):
                    print(">", self.second_file[i].strip('\n'))

            if command == 'd':
                for i in range(file_1_command_start, file_1_command_end):
                    print("<", self.first_file[i].strip('\n'))

            if command == 'c':
                for i in range(file_1_command_start, file_1_command_end):
                    print("<", self.first_file[i].strip('\n'))
                print("---")
                for i in range(file_2_command_start, file_2_command_end):
                    print(">", self.second_file[i].strip('\n'))
    
    def output_unmodified(self, diff_file, command_skipped, file_to_output):
        prev = 0
        file = (self.first_file, self.second_file)[file_to_output - 1]
        
        for line in diff_file.lines:
            # print(line.strip('\n'))
            command = re.findall("[A-z]",line)[0]
            if command == command_skipped:
                continue
            file_lines = line.split(command)[file_to_output - 1]
            command_start, command_end = get_command_positions(file_lines, command, file_to_output)    
            # print(f'File Stats: Prev = {prev} Command Start = {command_start}, Command End = {command_end}')
            for i in range(prev, command_start):
                print(file[i].strip('\n'))
            prev = command_end
            print("...")

        for i in range(prev, len(file)):
            print(file[i].strip('\n'))

    def output_unmodified_from_original(self, diff_file):
        self.output_unmodified(diff_file, 'a', 1)
    
    def output_unmodified_from_new(self, diff_file):
        self.output_unmodified(diff_file, 'd', 2)

    def get_all_diff_commands():
        print("Testing get_all_diff_commands")
        return ["Diff_Files"]

In [4]:
# Wrong5, Wrong6, Wrong7
diff_1 = DiffCommands('diff_1.txt')
diff_2 = DiffCommands('diff_2.txt')
diff_3 = DiffCommands('diff_3.txt')
# DiffCommands('wrong_4.txt')
# DiffCommands('wrong_5.txt')
# DiffCommands('wrong_6.txt')
# DiffCommands('wrong_7.txt')

In [5]:
print(diff_1)

1,2d0
3a2
5c4
7a7,8
10d10
13,16c13,15


In [6]:
pair_of_files = OriginalNewFiles('file_1_1.txt', 'file_1_2.txt')

In [7]:
pair_of_files.output_diff(diff_1)

1,2d0
< A line to delete: 1
< A line to delete: 2
3a2
> A line to insert: 1
5c4
< A line to change: 1
---
> A changed line: 1
7a7,8
> A line to insert: 2
> A line to insert: 3
10d10
< A line to delete: 3
13,16c13,15
< A line to change: 2
< A line to change: 3
< A line to change: 4
< A line to change: 5
---
> A changed line: 2
> A changed line: 3
> A changed line: 4


In [8]:
pair_of_files.output_unmodified_from_original(diff_1)

...
A line that stays: 1
A line that stays: 2
...
A line that stays: 3
A line that stays: 4
A line that stays: 5
A line that stays: 6
...
A line that stays: 7
A line that stays: 8
...
A line that stays: 9


In [9]:
pair_of_files.output_unmodified_from_new(diff_1)

A line that stays: 1
...
A line that stays: 2
...
A line that stays: 3
A line that stays: 4
...
A line that stays: 5
A line that stays: 6
A line that stays: 7
A line that stays: 8
...
A line that stays: 9


In [32]:
# 1.0 Read files
file_1 = 'file_1_1.txt'
file_2 = 'file_1_2.txt'

def shorten_lines(lines):    
    line_templates = {
        "A changed line":'CH',
        "A line to delete":'DL',
        "A line to insert":'IN',
        "A line to change": 'TC',
        "A line that stays":'ST'}

    shortened_lines = []
    for line in lines:
        line_split = line.split(": ")
        shortened_lines.append(line_templates[line_split[0]] + line_split[1].strip())
    return shortened_lines

def display_grid(file_1, file_2):
    for line in file_1:
        print(line + " |")
    print("     " + "".join(".---." for i in range(0, len(file_2))))
    print("      ", end="")
    for line in file_2:
        print(line + "  ", end="")

file_1_shortened = shorten_lines(pair_of_files.first_file)
file_2_shortened = shorten_lines(pair_of_files.second_file)
print(file_1_shortened)
print(file_2_shortened)
print()
display_grid(file_1_shortened, file_2_shortened)


['DL1', 'DL2', 'ST1', 'ST2', 'TC1', 'ST3', 'ST4', 'ST5', 'ST6', 'DL3', 'ST7', 'ST8', 'TC2', 'TC3', 'TC4', 'TC5', 'ST9']
['ST1', 'IN1', 'ST2', 'CH1', 'ST3', 'ST4', 'IN2', 'IN3', 'ST5', 'ST6', 'ST7', 'ST8', 'CH2', 'CH3', 'CH4', 'ST9']

DL1 |
DL2 |
ST1 |
ST2 |
TC1 |
ST3 |
ST4 |
ST5 |
ST6 |
DL3 |
ST7 |
ST8 |
TC2 |
TC3 |
TC4 |
TC5 |
ST9 |
     .---..---..---..---..---..---..---..---..---..---..---..---..---..---..---..---.
      ST1  IN1  ST2  CH1  ST3  ST4  IN2  IN3  ST5  ST6  ST7  ST8  CH2  CH3  CH4  ST9  