In [3]:
import hashlib
import ast
import astunparse

def hash_code(code):
    """
    Hashes a code snippet using the SHA-256 algorithm.
    """
    hashed = hashlib.sha256(code.encode()).hexdigest()
    return hashed

def generate_ast(code):
    """
    Generates the abstract syntax tree (AST) for a code snippet.
    """
    try:
        tree = ast.parse(code)
        return tree
    except SyntaxError:
        return None

def compare_asts(ast1, ast2):
    """
    Compares two abstract syntax trees (ASTs) for similarity.
    """
    return ast.dump(ast1) == ast.dump(ast2)

def read_code_from_file(file_path):
    """
    Reads code from a file.
    """
    with open(file_path, 'r') as file:
        code = file.read()
    return code

def calculate_similarity_percentage(hash1, hash2, ast_similarity):
    """
    Calculates the overall similarity percentage.
    """
    similarity_count = sum(a == b for a, b in zip(hash1, hash2))
    total_chars = max(len(hash1), len(hash2))
    similarity_percentage = (similarity_count / total_chars) * 100

    if ast_similarity:
        similarity_percentage += ast_similarity
        similarity_percentage /= 2  # Take the average

    return similarity_percentage

def detect_duplicate_code():
    """
    Takes two file paths from the user, reads code from files,
    and detects if they are duplicates. Also calculates the similarity percentage.
    """
    print("Enter the file path for the first code snippet:")
    file_path1 = input()

    print("Enter the file path for the second code snippet:")
    file_path2 = input()

    # Read code from files
    code_snippet1 = read_code_from_file(file_path1)
    code_snippet2 = read_code_from_file(file_path2)

    # Hash the code snippets
    hash1 = hash_code(code_snippet1)
    hash2 = hash_code(code_snippet2)

    # Generate ASTs
    ast1 = generate_ast(code_snippet1)
    ast2 = generate_ast(code_snippet2)

    # Compare the ASTs
    ast_similarity = 0
    if ast1 and ast2:
        ast_similarity = compare_asts(ast1, ast2) * 100
        print(f"AST Similarity Percentage: {ast_similarity:.2f}%")

    # Compare the hashes
    if hash1 == hash2:
        print("Duplicate code detected!")
    else:
        print("Code snippets are different.")

    # Calculate and display overall similarity percentage
    similarity_percentage = calculate_similarity_percentage(hash1, hash2, ast_similarity)
    print(f"Overall Similarity Percentage: {similarity_percentage:.2f}%")

if __name__ == "__main__":
    detect_duplicate_code()


Enter the file path for the first code snippet:
/content/abc1.py
Enter the file path for the second code snippet:
/content/abc2.py
AST Similarity Percentage: 100.00%
Duplicate code detected!
Overall Similarity Percentage: 100.00%
