In [1]:
import zipfile
import hashlib
import csv
import os
import io

In [2]:
# Function to compute the SHA-256 hash of file contents
def compute_hash(file, algorithm="sha256"):
    hash_func = hashlib.new(algorithm)
    hash_func.update(file.read())
    return hash_func.hexdigest()

In [34]:
# Function to extract hashes from a .zip archive, handling nested .jar files, with password support
def extract_hashes_from_nested_archive(archive_data, depth_count, password=None):
    nested_file_hashes = {}
    # If archive_data is a BytesIO object, use .getvalue() to extract raw bytes

    print(archive_data)
    if isinstance(archive_data, io.BytesIO):
        archive_data = archive_data.getvalue()

    # Open the .zip (or .jar) archive from raw bytes
    #print(archive_data)
    with zipfile.ZipFile(io.BytesIO(archive_data)) as archive:
        if password:
            archive.setpassword(password)

        for file_name in archive.namelist():
            # Skip directories
            print(file_name)
            if file_name.endswith("/"):
                continue

            # Check if the file itself is a .jar (or another nested archive)            
            if file_name.endswith(".jar"):
                if depth_count == 1:
                    continue

                with archive.open(file_name) as nested_jar_file:
                    # Recursively extract hashes from the nested .jar
                    nested_file_hashes[file_name] = compute_hash(nested_jar_file)
                    nested_data = io.BytesIO(nested_jar_file.read())
                    nested_hashes = extract_hashes_from_nested_archive(nested_data, depth_count-1, password=None)

                    for nested_name, nested_hash in nested_hashes.items():
                        #nested_file_hashes[f"{file_name}"] = file_name
                        nested_file_hashes[f"{nested_name}"] = [nested_hash, file_name]
                        #print(nested_file_hashes[f"{nested_name}"])

            else:
                # For regular files, compute the hash
                with archive.open(file_name) as file_obj:
                    #file_content = io.BytesIO(file.read())
                    nested_file_hashes[file_name] = compute_hash(file_obj)
                    #print(f"{file_name} {nested_file_hashes[file_name]}")

    return nested_file_hashes

In [28]:
# Load your main ZIP file in binary mode
inputPaths = []

for i in range(2):
    inputDir = input("jar 파일 경로와 이름을 입력하세요. : ")
    inputPaths.append(inputDir)
    
password = b"b002cd031989a8118aab022d74616efd5718f399"

jar 파일 경로와 이름을 입력하세요. : \\192.168.2.21\Package\빌드 형상 관리\MARS Platform V2.5\v2.5.5.13\v2.5.5.13.23\mars_server_v2.5.5.13.23.slsp
jar 파일 경로와 이름을 입력하세요. : \\192.168.2.21\Package\빌드 형상 관리\MARS Platform V2.5\v2.5.5.13\v2.5.5.13.25\mars_server_v2.5.5.13.25.slsp


In [35]:
depth_count = 0

with open(inputPaths[0], "rb") as zip_file:
    zip_data = zip_file.read()

# Extract hashes from the ZIP file, including files within any nested .jar
file_hashes1 = extract_hashes_from_nested_archive(io.BytesIO(zip_data), depth_count, password=password)
#file_hashes2 = extract_hashes_from_nested_archive(io.BytesIO(zip_data), password=password)

b'archive_data'
BALTHAZAR/
BALTHAZAR/bin/
BALTHAZAR/bin/updateCacheDB.sh
BALTHAZAR/bin/start.sh
BALTHAZAR/bin/unrar_linux
BALTHAZAR/bin/unegg_mac
BALTHAZAR/bin/stop.sh
BALTHAZAR/bin/FileTypeDetector/
BALTHAZAR/bin/FileTypeDetector/HexaInspector.RTF.rule
BALTHAZAR/bin/FileTypeDetector/HexaInspector.ini
BALTHAZAR/bin/FileTypeDetector/FileTypeDetector-1.0.33
BALTHAZAR/bin/FileTypeDetector/HexaInspector.PPAM.rule
BALTHAZAR/bin/FileTypeDetector/HexaInspector.signature.rule
BALTHAZAR/bin/unegg_linux
BALTHAZAR/bin/update.sh
BALTHAZAR/lib/
BALTHAZAR/lib/cdr-5.0.1.1.jar
b'archive_data'


BadZipFile: File is not a zip file

In [None]:
depth_count = 0

with open(inputPaths[1], "rb") as zip_file:
    zip_data = zip_file.read()

file_hashes2 = extract_hashes_from_nested_archive(io.BytesIO(zip_data), depth_count, password=password)

In [277]:
file_hashes = []
file_hashes.append(file_hashes1)
file_hashes.append(file_hashes2)
count = 0

for i in inputPaths:
    resultPath, fileName = os.path.split(i)
        
    with open(f"{resultPath}\\{fileName}.csv", "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['filename', 'hash'])

        for key, values in file_hashes[count].items():
            writer.writerow([key, values])
            
    count = count + 1
    csvfile.close()

In [278]:
outputCsv = input("결과 csv 저장할 경로와 파일명을 입력하세요.")

결과 csv 저장할 경로와 파일명을 입력하세요.D:\00_Script\자동화\CDR_v5.0.1\compare.csv


In [296]:
# Compare the hashes of files in both .jar files
with open(outputCsv, "w", newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['file_name', inputPaths[0], inputPaths[1]])
    
    for file_name in file_hashes1.keys():
        if file_name in file_hashes2.keys():
            if file_hashes1[file_name][0] != file_hashes2[file_name][0]:
                if type(file_hashes1[file_name][0]) != type(file_hashes2[file_name][0]):
                    if isinstance(file_hashes1[file_name][0],(str)) == True:
                        if file_hashes1[file_name][0] != file_hashes2[file_name][0][0]:
                            writer.writerow([file_name, file_hashes1[file_name][0], file_hashes2[file_name][0][0]])
                            #print([file_name, file_hashes1[file_name][0], file_hashes2[file_name][0][0]])
                        
                    if isinstance(file_hashes1[file_name][0],(list)) == True:
                        if file_hashes1[file_name][0][0] != file_hashes2[file_name][0]:
                            writer.writerow([file_name, file_hashes1[file_name][0][0], file_hashes2[file_name][0]])
                            #print([file_name, file_hashes1[file_name][0][0], file_hashes2[file_name][0]])
                
                if isinstance(file_hashes1[file_name][0],(str)) and isinstance(file_hashes2[file_name][0],(str)) == True:
                    writer.writerow([file_name, file_hashes1[file_name], file_hashes2[file_name]])
                    #print([file_name, file_hashes1[file_name], file_hashes2[file_name]])
                    
                if isinstance(file_hashes1[file_name][0],(list)) and isinstance(file_hashes2[file_name][0],(list)) == True:
                    if file_hashes1[file_name][0][0] != file_hashes2[file_name][0][0]:
                        writer.writerow([file_name, file_hashes1[file_name][0][0], file_hashes2[file_name][0][0]])
                # Print file name and hash values if they differ
                
        else:
            # Print file name and hash value if it's only in the first jar
            if isinstance(file_hashes1[file_name][0],(str)) == True:
                writer.writerow([file_name, file_hashes1[file_name]])
                print([file_name, file_hashes1[file_name]])
            
            if isinstance(file_hashes1[file_name][0],(list)) == True:
                writer.writerow([file_name, file_hashes1[file_name][0][0]])
                print([file_name, file_hashes1[file_name][0][0]])

    # Check for files in the second jar that are not in the first jar
    for file_name in file_hashes2.keys():
        if file_name not in file_hashes1.keys():
            if isinstance(file_hashes2[file_name][0],(str)) == True:
                writer.writerow([file_name, "-", file_hashes2[file_name]])
                print([file_name, "-", file_hashes2[file_name]])
            
            if isinstance(file_hashes2[file_name][0],(list)) == True:
                writer.writerow([file_name, "-", file_hashes2[file_name][0][0]])
                print([file_name, "-", file_hashes2[file_name][0][0]])
            # Print file name and hash value if it's only in the second jar


['com/seculetter/sukcheong/core/rtf/RtfNodeManager.class', '-', 'f44977cba963d886eed65bcb89b6245e868f39c9707b8cb51d85c9995709fd16']
['com/seculetter/sukcheong/core/OleObjectBin$1.class', '-', '4922110b077330cb8625604ec21ea4c41f21717d7929594ba37ab0c89be49072']


In [231]:
result = isinstance("hi", (int, float, str))
print(result)

if isinstance(file_hashes1[file_name][0],(str)) == True:
    print(1)

True
1


In [18]:
inputPaths

['\\\\192.168.2.21\\Package\\빌드 형상 관리\\MASR CDR\\v5.0.1\\cdr-5.0.1.1.jar',
 '\\\\192.168.2.21\\Package\\빌드 형상 관리\\MASR CDR\\v5.0.1\\cdr-5.0.1.2.jar']