# MD5 Collision Attack Lab

In [1]:
# Importando as bibliotecas necessárias
import os
import hashlib
import subprocess

In [2]:
# Função auxiliar para executar comandos do sistema e capturar a saída
def run_command(command):
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    return result.stdout.strip()

# Tarefa 1: Gerando Dois Arquivos Diferentes com o Mesmo Hash MD5

Passo 1: Criar um Arquivo de Prefixo

In [3]:
prefix_file = "prefix.txt"
with open(prefix_file, "w") as f:
    f.write("Teste de colisão MD5")

print(f"Arquivo de prefixo '{prefix_file}' criado.")

Arquivo de prefixo 'prefix.txt' criado.


Passo 2: Gerar Arquivos com Colisão de Hash MD5

In [4]:
out1 = "out1.bin"
out2 = "out2.bin"
command = f"md5collgen -p {prefix_file} -o {out1} {out2}"
collision_output = run_command(command)

print("Arquivos binários gerados com colisão de hash MD5.")

Arquivos binários gerados com colisão de hash MD5.


Passo 3: Verificar as Diferenças e Hashes

In [5]:
diff_command = f"diff {out1} {out2}"
diff_output = run_command(diff_command)
hash_out1 = run_command(f"md5sum {out1}")
hash_out2 = run_command(f"md5sum {out2}")

print(f"Diferenças entre os arquivos:\n{diff_output}")
print(f"Hash MD5 do {out1}: {hash_out1}")
print(f"Hash MD5 do {out2}: {hash_out2}")

Diferenças entre os arquivos:
Binary files out1.bin and out2.bin differ
Hash MD5 do out1.bin: 7c317f92e1fe928b4e8e4edcb3031e72  out1.bin
Hash MD5 do out2.bin: 7c317f92e1fe928b4e8e4edcb3031e72  out2.bin


# Tarefa 2: Entendendo a Propriedade do MD5

Passo 1: Concatenar Arquivos e Testar o Comportamento

In [6]:
suffix_file = "suffix.txt"
with open(suffix_file, "w") as f:
    f.write("Sufixo Comum")

final1 = "final1.bin"
final2 = "final2.bin"
run_command(f"cat {out1} {suffix_file} > {final1}")
run_command(f"cat {out2} {suffix_file} > {final2}")

hash_final1 = run_command(f"md5sum {final1}")
hash_final2 = run_command(f"md5sum {final2}")

print(f"Hash MD5 do {final1}: {hash_final1}")
print(f"Hash MD5 do {final2}: {hash_final2}")

Hash MD5 do final1.bin: 3d14e0bb9c99ad99019bf3373a0a8928  final1.bin
Hash MD5 do final2.bin: 3d14e0bb9c99ad99019bf3373a0a8928  final2.bin


# Tarefa 3: Gerando Dois Executáveis com o Mesmo Hash MD5

Passo 1: Criar e Compilar o Código C

In [7]:
program_code = """
#include <stdio.h>

unsigned char xyz[200] = {
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
};

int main()
{
    int i;
    for (i = 0; i < 200; i++){
        printf("%x", xyz[i]);
    }
    printf("\\n");
}
"""

In [8]:
# Salvando o código C em um arquivo
program_file = "program.c"
with open(program_file, "w") as f:
    f.write(program_code)
print(f"Código C salvo em '{program_file}'.")

Código C salvo em 'program.c'.


In [9]:
# Compilando o código C
run_command(f"gcc -o program {program_file}")
print("Código compilado para 'program'.")

Código compilado para 'program'.


Passo 2: Modificar o Executável para Criar Colisão de Hash

In [10]:
# Dividir o binário em prefixo, região de colisão e sufixo

prefix_end = 0x3040   # Início da região de colisão, incluindo 32 bytes do array
collision_region_size = 0x80  # 128 bytes em hexadecimal (região de colisão)
collision_end = prefix_end + collision_region_size + 1 # Final da região de colisão

prefix = "prefix.bin"
suffix = "suffix.bin"

# Dividir o binário em partes
run_command(f"head -c {prefix_end} program > {prefix}")
run_command(f"tail -c +{collision_end} program > {suffix}")

print(f"Arquivo dividido em '{prefix}' e '{suffix}'.")

Arquivo dividido em 'prefix.bin' e 'suffix.bin'.


In [11]:
# Gerar partes de colisão usando o prefixo
output1 = "collision1.bin"
output2 = "collision2.bin"
collision_command = f"md5collgen -p {prefix} -o {output1} {output2}"
run_command(collision_command)

print(f"Colisões geradas: '{output1}' e '{output2}'.")

Colisões geradas: 'collision1.bin' e 'collision2.bin'.


In [12]:
# Recombinar as partes para criar dois binários diferentes
program1 = "program1"
program2 = "program2"
run_command(f"cat {output1} {suffix} > {program1}")
run_command(f"cat {output2} {suffix} > {program2}")

# Conceder permissões de execução aos arquivos gerados
run_command(f"chmod +x {program1}")
run_command(f"chmod +x {program2}")

print(f"Programas '{program1}' e '{program2}' criados.")

Programas 'program1' e 'program2' criados.


In [13]:
# Executar e verificar o comportamento dos programas
output_program1 = run_command(f"./{program1}")
output_program2 = run_command(f"./{program2}")

print(f"Saída de {program1}: {output_program1}")
print(f"Saída de {program2}: {output_program2}")

diff_command = f"diff {program1} {program2}"
diff_output = run_command(diff_command)

print(f"Diferenças entre os arquivos:\n{diff_output}")

Saída de program1: 4141414141414141414141414141414141414141414141414141414141414141e98a71d23b63c44a37841cfd21edf47f73661ab0435a338236b67735acc7b656a9fb9c74a6f7fbeb5b3cbcce432a94c1dec3f350d2358ad18412908829f99f384eb2e977dcb391fbcd34c48c94014cf1132a0641c93d0adc5070975f68c7d39d6a489785c11b39fdc33b8a68dcec29cc36874458b75e5b30826f16da441414141414141414141414141414141414141414141414141414141414141414141414141414141
Saída de program2: 4141414141414141414141414141414141414141414141414141414141414141e98a71d23b63c44a37841cfd21edf47f73661a30435a338236b67735acc7b656a9fb9c74a6f7fbeb5b3cbccec32a94c1dec3f350d2358ad18412108829f99f384eb2e977dcb391fbcd34c48c94014cf113220641c93d0adc5070975f68c7d39d6a489785c11b39fdc33b8ae8ccec29cc36874458b75e5b3026f16da441414141414141414141414141414141414141414141414141414141414141414141414141414141


Passo 4: Verificar o Hash MD5 dos Executáveis

In [23]:
# Verificar os hashes MD5 dos dois executáveis
hash_program1 = run_command(f"md5sum {program1}")
hash_program2 = run_command(f"md5sum {program2}")

print(f"Hash MD5 de {program1}: {hash_program1}")
print(f"Hash MD5 de {program2}: {hash_program2}")

Diferenças entre os arquivos:
Binary files program1 and program2 differ
Hash MD5 de program1: cae030aee2fe395d63a70685ff2490c9  program1
Hash MD5 de program2: cae030aee2fe395d63a70685ff2490c9  program2


# Tarefa 4: Fazendo os Programas Executarem Diferente

Passo 1: Modificar o Código C para Diferentes Comportamentos

In [15]:
# Criar o código-fonte C modificado
program_code_modified = """
#include <stdio.h>

unsigned char X[200] = {
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
};

unsigned char Y[200] = {
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
};

void benign_code() {
    printf("Executando código benigno.\\n");
}

void malicious_code() {
    printf("Executando código malicioso!\\n");
}

int main()
{
    int i;
    int equal = 1;
    for (i = 0; i < 200; i++) {
        if (X[i] != Y[i]) {
            equal = 0;
            break;
        }
    }
    if (equal) {
        benign_code();
    } else {
        malicious_code();
    }
    return 0;
}
"""

# Salvando o código C modificado em um arquivo
program_file_modified = "program_modified.c"
with open(program_file_modified, "w") as f:
    f.write(program_code_modified)

print(f"Código C modificado salvo em '{program_file_modified}'.")

Código C modificado salvo em 'program_modified.c'.


Passo 2: Compilar o Código C Modificado

In [16]:
# Compilar o código C modificado
compile_output_modified = run_command(f"gcc -o program_modified {program_file_modified}")
print("Código compilado para 'program_modified'.")


Código compilado para 'program_modified'.


Passo 3: Modificar o Binário para Criar Diferentes Comportamentos

In [17]:
# Dividir o binário em prefixo, região de colisão e sufixo

# Estabelecendo os offsets conforme discutido
prefix_end_modified = 0x3040   # Início da região de colisão, incluindo 32 bytes do array
collision_region_size = 0x80  # 128 bytes em hexadecimal (região de colisão)
collision_end_modified = prefix_end + collision_region_size + 1  # Final da região de colisão

prefix_modified = "prefix_modified.bin"
suffix_modified = "suffix_modified.bin"

# Dividir o binário em partes
run_command(f"head -c {prefix_end_modified} program_modified > {prefix_modified}")
run_command(f"tail -c +{collision_end_modified} program_modified > {suffix_modified}")

print(f"Arquivo dividido em '{prefix_modified}' e '{suffix_modified}'.")

Arquivo dividido em 'prefix_modified.bin' e 'suffix_modified.bin'.


In [18]:
# Gerar partes de colisão usando o prefixo
output1_modified = "collision1_modified.bin"
output2_modified = "collision2_modified.bin"
collision_command_modified = f"md5collgen -p {prefix_modified} -o {output1_modified} {output2_modified}"
run_command(collision_command_modified)

print(f"Colisões geradas: '{output1_modified}' e '{output2_modified}'.")

Colisões geradas: 'collision1_modified.bin' e 'collision2_modified.bin'.


In [19]:
# Alterar o array de Y para igualar com X
suffix_a = "suffix_a.bin"
suffix_b = "suffix_b.bin"
suffix_c = "suffix_c.bin"

AAA_region = 0x20 + 0x40 # ultimos bytes de X e 32 primeiros bytes A de Y
sufix_tail = collision_end_modified + AAA_region + collision_region_size # Final da inclusão do trecho de colisão

# Selecionar as partes do sufixo
run_command(f"tail -c {collision_region_size} collision1_modified.bin > {suffix_b}")
run_command(f"head -c {AAA_region} suffix_modified.bin > {suffix_a}")
run_command(f"tail -c +{sufix_tail} program_modified > {suffix_c}")

# Juntar as partes do sufixo

suffix1_modified = "suffix1_modified.bin"

run_command(f"cat {suffix_a} {suffix_b} {suffix_c} > {suffix1_modified}")

print(f"Sufixo 1 gerado: `{suffix1_modified}'.")


Sufixo 1 gerado: `suffix1_modified.bin'.


In [20]:
# Recombinar as partes para criar dois binários diferentes
program1_modified = "program1_modified"
program2_modified = "program2_modified"
run_command(f"cat {output1_modified} {suffix1_modified} > {program1_modified}")
run_command(f"cat {output2_modified} {suffix1_modified} > {program2_modified}")

# Conceder permissões de execução aos arquivos gerados
run_command(f"chmod +x {program1_modified}")
run_command(f"chmod +x {program2_modified}")

print(f"Programas '{program1_modified}' e '{program2_modified}' criados.")

Programas 'program1_modified' e 'program2_modified' criados.


Passo 4: Testar o Comportamento dos Programas

In [21]:
# Executar e verificar o comportamento dos programas
output_program1_modified = run_command(f"./{program1_modified}")
output_program2_modified = run_command(f"./{program2_modified}")

print(f"Saída de {program1_modified}: {output_program1_modified}")
print(f"Saída de {program2_modified}: {output_program2_modified}")

Saída de program1_modified: Executando código benigno.
Saída de program2_modified: Executando código malicioso!


Passo 5: Verificar o Hash MD5 dos Executáveis

In [22]:
# Verificar os hashes MD5 dos dois executáveis
hash_program1_modified = run_command(f"md5sum {program1_modified}")
hash_program2_modified = run_command(f"md5sum {program2_modified}")

print(f"Hash MD5 de {program1_modified}: {hash_program1_modified}")
print(f"Hash MD5 de {program2_modified}: {hash_program2_modified}")

Hash MD5 de program1_modified: 257ac5464189357288a48e2a6a7947fa  program1_modified
Hash MD5 de program2_modified: 257ac5464189357288a48e2a6a7947fa  program2_modified
