In [20]:
import difflib
import re

def preprocess_code(code):
    # Remove single-line comments
    code = re.sub(r'#.*', '', code)
    # Remove multi-line comments
    code = re.sub(r'\"\"\"(.*?)\"\"\"|\'\'\'(.*?)\'\'\'|/\*(.*?)\*/', '', code, flags=re.DOTALL)
    # Remove extra empty lines
    code = re.sub(r'\n\s*\n', '\n', code)
    return code

def read_code_from_file(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None

def calculate_similarity_percentage(code1, code2):
    # Preprocess code (remove comments and extra empty lines)
    code1 = preprocess_code(code1)
    code2 = preprocess_code(code2)

    lines1 = code1.splitlines()
    lines2 = code2.splitlines()

    d = difflib.Differ()
    diff = list(d.compare(lines1, lines2))

    # Filter out lines that are not common
    common_lines = [line[2:] for line in diff if line.startswith(' ')]

    # Calculate the percentage of copied code
    total_lines = max(len(lines1), len(lines2))
    common_percentage = (len(common_lines) / total_lines) * 100

    return common_percentage

if __name__ == "__main__":
    # Get file paths from the user
    file_path1 = input("Enter the file path for the first code snippet:\n")
    file_path2 = input("Enter the file path for the second code snippet:\n")

    # Read code from files
    code_snippet1 = read_code_from_file(file_path1)
    code_snippet2 = read_code_from_file(file_path2)

    if code_snippet1 is not None and code_snippet2 is not None:
        similarity_percentage = calculate_similarity_percentage(code_snippet1, code_snippet2)
        print(f"\nSimilarity percentage (excluding comments and extra empty lines): {similarity_percentage:.2f}%")


Enter the file path for the first code snippet:
/content/abc1.py
Enter the file path for the second code snippet:
/content/abc3.py

Similarity percentage (excluding comments and extra empty lines): 87.50%


In [21]:
import difflib
import re

def preprocess_code(code):
    # Remove single-line comments
    code = re.sub(r'(?<!\")(\/\/|#).*', '', code)
    # Remove multi-line comments
    code = re.sub(r'\"\"\"(.*?)\"\"\"|\'\'\'(.*?)\'\'\'|/\*(.*?)\*/', '', code, flags=re.DOTALL)
    # Remove extra empty lines
    code = re.sub(r'\n\s*\n', '\n', code)
    return code

def read_code_from_file(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None

def calculate_similarity_percentage(code1, code2):
    # Preprocess code (remove comments and extra empty lines)
    code1 = preprocess_code(code1)
    code2 = preprocess_code(code2)

    lines1 = code1.splitlines()
    lines2 = code2.splitlines()

    d = difflib.Differ()
    diff = list(d.compare(lines1, lines2))

    # Filter out lines that are not common
    common_lines = [line[2:] for line in diff if line.startswith(' ')]

    # Calculate the percentage of copied code
    total_lines = max(len(lines1), len(lines2))
    common_percentage = (len(common_lines) / total_lines) * 100

    return common_percentage

if __name__ == "__main__":
    # Get file paths from the user
    file_path1 = input("Enter the file path for the first code snippet:\n")
    file_path2 = input("Enter the file path for the second code snippet:\n")

    # Read code from files
    code_snippet1 = read_code_from_file(file_path1)
    code_snippet2 = read_code_from_file(file_path2)

    if code_snippet1 is not None and code_snippet2 is not None:
        similarity_percentage = calculate_similarity_percentage(code_snippet1, code_snippet2)
        print(f"\nSimilarity percentage (excluding comments and extra empty lines): {similarity_percentage:.2f}%")


Enter the file path for the first code snippet:
/content/abc1.py
Enter the file path for the second code snippet:
/content/abc3.py

Similarity percentage (excluding comments and extra empty lines): 87.50%
