In [2]:

import hashlib



def read_code_from_file(file_path):
    """
    Reads code from a file.
    """
    with open(file_path, 'r') as file:
        code = file.read()
    return code

def calculate_similarity_percentage(hash1, hash2):
    """
    Calculates the similarity percentage between two code snippets.
    """
    similarity_count = sum(a == b for a, b in zip(hash1, hash2))
    total_chars = max(len(hash1), len(hash2))
    similarity_percentage = (similarity_count / total_chars) * 100
    return similarity_percentage

def detect_duplicate_code():
    """
    Takes two file paths from the user, reads code from files,
    and detects if they are duplicates. Also calculates the similarity percentage.
    """
    print("Enter the file path for the first code snippet:")
    file_path1 = input()

    print("Enter the file path for the second code snippet:")
    file_path2 = input()

    # Read code from files
    code_snippet1 = read_code_from_file(file_path1)
    code_snippet2 = read_code_from_file(file_path2)

    # Hash the code snippets
    hash1 = hash_code(code_snippet1)
    hash2 = hash_code(code_snippet2)

    # Compare the hashes
    if hash1 == hash2:
        print("Duplicate code detected!")
    else:
        print("Code snippets are different.")

    # Calculate and display similarity percentage
    similarity_percentage = calculate_similarity_percentage(hash1, hash2)
    print(f"Similarity Percentage: {similarity_percentage:.2f}%")

if __name__ == "__main__":
    detect_duplicate_code()


In [None]:
import ast
import difflib

def get_ast(code):
    return ast.parse(code)

def calculate_similarity_percentage(code1, code2):
    ast1 = get_ast(code1)
    ast2 = get_ast(code2)

    d = difflib.Differ()
    diff = list(d.compare(ast.dump(ast1).splitlines(), ast.dump(ast2).splitlines()))

    # Filter out lines that are not common
    common_lines = [line[2:] for line in diff if line.startswith(' ')]

    # Calculate the percentage of copied code
    total_lines = max(len(ast.dump(ast1).splitlines()), len(ast.dump(ast2).splitlines()))
    common_percentage = (len(common_lines) / total_lines) * 100

    return common_percentage

if __name__ == "__main__":
    # Get file paths from the user
    file_path1 = input("Enter the file path for the first code snippet:\n")
    file_path2 = input("Enter the file path for the second code snippet:\n")

    # Read code from files
    with open(file_path1, 'r') as file:
        code_snippet1 = file.read()

    with open(file_path2, 'r') as file:
        code_snippet2 = file.read()

    similarity_percentage = calculate_similarity_percentage(code_snippet1, code_snippet2)
    print(f"\nSimilarity percentage (using AST): {similarity_percentage:.2f}%")


In [3]:
import hashlib
import ast
import difflib

# Code 1: SHA-256 Hashing
def hash_code(code):
    """
    Hashes a code snippet using the SHA-256 algorithm.
    """
    hashed = hashlib.sha256(code.encode()).hexdigest()
    return hashed

def read_code_from_file(file_path):
    """
    Reads code from a file.
    """
    with open(file_path, 'r') as file:
        code = file.read()
    return code

def calculate_similarity_percentage_hashing(hash1, hash2):
    """
    Calculates the similarity percentage between two code snippets using SHA-256 hashes.
    """
    similarity_count = sum(a == b for a, b in zip(hash1, hash2))
    total_chars = max(len(hash1), len(hash2))
    similarity_percentage = (similarity_count / total_chars) * 100
    return similarity_percentage

# Code 2: AST-based Comparison
def get_ast(code):
    return ast.parse(code)

def calculate_similarity_percentage_ast(code1, code2):
    ast1 = get_ast(code1)
    ast2 = get_ast(code2)

    d = difflib.Differ()
    diff = list(d.compare(ast.dump(ast1).splitlines(), ast.dump(ast2).splitlines()))

    # Filter out lines that are not common
    common_lines = [line[2:] for line in diff if line.startswith(' ')]

    # Calculate the percentage of copied code
    total_lines = max(len(ast.dump(ast1).splitlines()), len(ast.dump(ast2).splitlines()))
    common_percentage = (len(common_lines) / total_lines) * 100

    return common_percentage

if __name__ == "__main__":
    print("Choose the method to detect code similarity:")
    print("1. SHA-256 Hashing")
    print("2. AST-based Comparison")

    choice = input()

    if choice == "1":
        # SHA-256 Hashing
        print("Enter the file path for the first code snippet:")
        file_path1 = input()

        print("Enter the file path for the second code snippet:")
        file_path2 = input()

        # Read code from files
        code_snippet1 = read_code_from_file(file_path1)
        code_snippet2 = read_code_from_file(file_path2)

        # Hash the code snippets
        hash1 = hash_code(code_snippet1)
        hash2 = hash_code(code_snippet2)

        # Compare the hashes
        if hash1 == hash2:
            print("Duplicate code detected!")
        else:
            print("Code snippets are different.")

        # Calculate and display similarity percentage
        similarity_percentage = calculate_similarity_percentage_hashing(hash1, hash2)
        print(f"Similarity Percentage (using SHA-256 Hashing): {similarity_percentage:.2f}%")

    elif choice == "2":
        # AST-based Comparison
        file_path1 = input("Enter the file path for the first code snippet:\n")
        file_path2 = input("Enter the file path for the second code snippet:\n")

        # Read code from files
        with open(file_path1, 'r') as file:
            code_snippet1 = file.read()

        with open(file_path2, 'r') as file:
            code_snippet2 = file.read()

        similarity_percentage = calculate_similarity_percentage_ast(code_snippet1, code_snippet2)
        print(f"\nSimilarity percentage (using AST-based Comparison): {similarity_percentage:.2f}%")

    else:
        print("Invalid choice. Please choose 1 or 2.")


Choose the method to detect code similarity:
1. SHA-256 Hashing
2. AST-based Comparison
1
Enter the file path for the first code snippet:
/content/abc1.py
Enter the file path for the second code snippet:
/content/abc3.py
Code snippets are different.
Similarity Percentage (using SHA-256 Hashing): 6.25%
