# **Hit 2**

**Versión de GPU**

In [None]:
%%writefile hola_mundo.cu
#include <cstdio>
#include <cuda_runtime.h>

__global__ void hello_kernel(){

    int thread_in_block = threadIdx.x;
    int block_index = blockIdx.x;
    int threads_per_block = blockDim.x;
    int global_id = block_index * threads_per_block + thread_in_block;

    printf("Hola mundo desde el hilo global %d (block %d, thread %d)\n", global_id, block_index, thread_in_block);

}

int main(){
    //2 bloques de 4 hilos cada uno
    const int blocks = 2;
    const int threads_per_block = 4;

    hello_kernel<<<blocks, threads_per_block>>>();

    //Verificar errores
    cudaError_t err = cudaGetLastError();
    if (err != cudaSuccess) {
        fprintf(stderr, "Error en el lanzamiento del kernel: %s\n", cudaGetErrorString(err));
        return 1;
    }

    //Verificar errores cuando la GPU termine
    err = cudaDeviceSynchronize();
    if (err != cudaSuccess) {
        fprintf(stderr, "Error despues de sincronizar: %s\n", cudaGetErrorString(err));
        return 1;
    }

    return 0;

}

Overwriting hola_mundo.cu


In [None]:
!nvcc hola_mundo.cu -o hola_mundo -arch=sm_75

In [None]:
!./hola_mundo

Hola mundo desde el hilo global 0 (block 0, thread 0)
Hola mundo desde el hilo global 1 (block 0, thread 1)
Hola mundo desde el hilo global 2 (block 0, thread 2)
Hola mundo desde el hilo global 3 (block 0, thread 3)
Hola mundo desde el hilo global 4 (block 1, thread 0)
Hola mundo desde el hilo global 5 (block 1, thread 1)
Hola mundo desde el hilo global 6 (block 1, thread 2)
Hola mundo desde el hilo global 7 (block 1, thread 3)


**Versión de CPU**

In [None]:
blocks = 2
threads_per_block = 4

print("Version CPU del hola mundo")
for block in range(blocks):
    for thread in range(threads_per_block):
        global_id = block * threads_per_block + thread
        print(f"Hola mundo desde el hilo global {global_id} (block {block}, thread {thread})")

Version CPU del hola mundo
Hola mundo desde el hilo global 0 (block 0, thread 0)
Hola mundo desde el hilo global 1 (block 0, thread 1)
Hola mundo desde el hilo global 2 (block 0, thread 2)
Hola mundo desde el hilo global 3 (block 0, thread 3)
Hola mundo desde el hilo global 4 (block 1, thread 0)
Hola mundo desde el hilo global 5 (block 1, thread 1)
Hola mundo desde el hilo global 6 (block 1, thread 2)
Hola mundo desde el hilo global 7 (block 1, thread 3)


# **Hit 3**

In [None]:
%%writefile ejemplo.cu

#include <iostream>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/sort.h>
#include <thrust/copy.h>
#include <thrust/random.h>

int main() {
    const int N = 1 << 20; //1M para prueba; cambiar a 32<<20 para algo mas realista pero costoso
    const int M = 8;

    thrust::default_random_engine rng(1337);
    thrust::uniform_int_distribution<int> dist;
    thrust::host_vector<int> h_vec(N);
    thrust::generate(h_vec.begin(), h_vec.end(), [&] { return dist(rng); });

    std::cout << "Antes (primeros " << M << "): ";
    for (int i = 0; i < M; ++i) std::cout << h_vec[i] << " ";
    std::cout << "\n";

    thrust::device_vector<int> d_vec = h_vec;
    thrust::sort(d_vec.begin(), d_vec.end());
    thrust::copy(d_vec.begin(), d_vec.begin() + M, h_vec.begin());

    std::cout << "Despues (primeros " << M << "): ";
    for (int i = 0; i < M; ++i) std::cout << h_vec[i] << " ";
    std::cout << "\n";

    return 0;
}

Writing ejemplo.cu


In [None]:
!nvcc ejemplo.cu -o ejemplo -arch=sm_75 -O2

In [None]:
!./ejemplo

Antes (primeros 8): 64538326 1478294467 18110393 180984444 336668598 1321185480 1122440121 294666980 
Despues (primeros 8): 1486 1554 1617 3454 5015 5354 6282 8234 


# **Hit 4**

In [None]:
%%writefile md5_gpu.cu

#include <cstdio>
#include <cstdlib>
#include <cstdint>
#include <cstring>
#include <cuda_runtime.h>

__constant__ uint32_t dev_shifts[16] = { 7,12,17,22, 5,9,14,20, 4,11,16,23, 6,10,15,21 };
__constant__ uint32_t dev_sines[64] = {
    0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee,0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501,
    0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be,0x6b901122,0xfd987193,0xa679438e,0x49b40821,
    0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa,0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8,
    0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed,0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a,
    0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c,0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70,
    0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05,0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665,
    0xf4292244,0x432aff97,0xab9423a7,0xfc93a039,0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1,
    0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1,0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391
};

// rotación izquierda (device)
__device__ __forceinline__ uint32_t dev_left_rotate(uint32_t x, uint32_t c) {
    return (x << c) | (x >> (32 - c));
}

// transform que procesa un bloque de 64 bytes
__device__ void md5_transform_device(const uint8_t* chunk, uint32_t* h) {

    uint32_t M[16];
    for (int i = 0; i < 16; ++i) {
        M[i] = (uint32_t)chunk[i*4]
             | ((uint32_t)chunk[i*4+1] << 8)
             | ((uint32_t)chunk[i*4+2] << 16)
             | ((uint32_t)chunk[i*4+3] << 24);
    }

    uint32_t a = h[0], b = h[1], c = h[2], d = h[3];

    for (int i = 0; i < 64; ++i) {
        uint32_t F, g;
        if (i < 16) {
            F = (b & c) | ((~b) & d);
            g = i;
        } else if (i < 32) {
            F = (d & b) | ((~d) & c);
            g = (5*i + 1) & 15;
        } else if (i < 48) {
            F = b ^ c ^ d;
            g = (3*i + 5) & 15;
        } else {
            F = c ^ (b | (~d));
            g = (7*i) & 15;
        }

        uint32_t tmp = a + F + dev_sines[i] + M[g];
        a = d; d = c; c = b;

        uint32_t sh;
        if (i < 16) sh = dev_shifts[i % 4];
        else if (i < 32) sh = dev_shifts[4 + (i % 4)];
        else if (i < 48) sh = dev_shifts[8 + (i % 4)];
        else sh = dev_shifts[12 + (i % 4)];

        b = b + dev_left_rotate(tmp, sh);
    }

    h[0] += a; h[1] += b; h[2] += c; h[3] += d;
}

// kernel que procesa todo el mensaje padded (un hilo)
__global__ void md5_kernel(const uint8_t* d_msg, size_t d_len, uint32_t* d_out_h) {
    if (threadIdx.x != 0 || blockIdx.x != 0) return; // un solo hilo

    uint32_t h[4];
    h[0] = 0x67452301;
    h[1] = 0xefcdab89;
    h[2] = 0x98badcfe;
    h[3] = 0x10325476;

    size_t nblocks = d_len / 64;
    for (size_t i = 0; i < nblocks; ++i) {
        md5_transform_device(d_msg + i*64, h);
    }

    // escribir resultado en device memory
    d_out_h[0] = h[0];
    d_out_h[1] = h[1];
    d_out_h[2] = h[2];
    d_out_h[3] = h[3];
}

// ---------------- Host helpers (padding) ----------------
unsigned char* md5_pad_message_host(const unsigned char* initial_msg, size_t initial_len, size_t* out_len) {
    size_t new_len = initial_len + 1;
    while (new_len % 64 != 56) new_len++;
    new_len += 8;
    unsigned char* msg = (unsigned char*)malloc(new_len);
    if (!msg) return NULL;
    memcpy(msg, initial_msg, initial_len);
    msg[initial_len] = 0x80;
    memset(msg + initial_len + 1, 0, new_len - initial_len - 1);
    uint64_t bits_len = (uint64_t)initial_len * 8;
    for (int i = 0; i < 8; ++i) msg[new_len - 8 + i] = (unsigned char)((bits_len >> (8 * i)) & 0xFF);
    *out_len = new_len;
    return msg;
}

// ---------------- Main (host) ----------------
int main(int argc, char* argv[]) {
    if (argc < 2) {
        printf("Uso: %s \"texto a hashear\"\n", argv[0]);
        return 1;
    }
    const char* input = argv[1];
    size_t input_len = strlen(input);

    // padding en host
    size_t padded_len;
    unsigned char* padded = md5_pad_message_host((const unsigned char*)input, input_len, &padded_len);
    if (!padded) {
        fprintf(stderr, "Fallo malloc padding\n");
        return 1;
    }

    // reservar memoria device
    uint8_t* d_msg = nullptr;
    uint32_t* d_out = nullptr;
    cudaError_t err = cudaMalloc((void**)&d_msg, padded_len);
    if (err != cudaSuccess) { fprintf(stderr, "cudaMalloc msg: %s\n", cudaGetErrorString(err)); free(padded); return 1; }
    err = cudaMemcpy(d_msg, padded, padded_len, cudaMemcpyHostToDevice);
    if (err != cudaSuccess) { fprintf(stderr, "cudaMemcpy msg: %s\n", cudaGetErrorString(err)); cudaFree(d_msg); free(padded); return 1; }

    err = cudaMalloc((void**)&d_out, 4 * sizeof(uint32_t));
    if (err != cudaSuccess) { fprintf(stderr, "cudaMalloc out: %s\n", cudaGetErrorString(err)); cudaFree(d_msg); free(padded); return 1; }

    // lanzar kernel (1 bloque, 1 hilo)
    md5_kernel<<<1,1>>>(d_msg, padded_len, d_out);
    err = cudaGetLastError();
    if (err != cudaSuccess) { fprintf(stderr, "Launch error: %s\n", cudaGetErrorString(err)); cudaFree(d_msg); cudaFree(d_out); free(padded); return 1; }

    err = cudaDeviceSynchronize();
    if (err != cudaSuccess) { fprintf(stderr, "Sync error: %s\n", cudaGetErrorString(err)); cudaFree(d_msg); cudaFree(d_out); free(padded); return 1; }

    uint32_t h_out[4];
    err = cudaMemcpy(h_out, d_out, 4 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
    if (err != cudaSuccess) { fprintf(stderr, "Memcpy out: %s\n", cudaGetErrorString(err)); cudaFree(d_msg); cudaFree(d_out); free(padded); return 1; }

    // imprimir en little-endian como MD5
    for (int i = 0; i < 4; ++i) {
        uint32_t v = h_out[i];
        printf("%02x%02x%02x%02x", v & 0xFF, (v >> 8) & 0xFF, (v >> 16) & 0xFF, (v >> 24) & 0xFF);
    }
    printf("\n");

    cudaFree(d_msg);
    cudaFree(d_out);
    free(padded);
    return 0;
}

Writing md5_gpu.cu


In [None]:
!nvcc md5_gpu.cu -o md5_gpu -arch=sm_75 -O2

In [None]:
!./md5_gpu "hola mundo"

0ad066a5d29f3f2a2a1c7c17dd082a79


**Versión de CPU**

In [None]:
import sys
import hashlib

def md5_of_string(s: str) -> str:
    """Devuelve el MD5 (hex) de la cadena s."""
    # hashlib.md5 necesita bytes, por eso encode()
    m = hashlib.md5(s.encode('utf-8'))
    return m.hexdigest()

def main():
    if len(sys.argv) < 2:
        print("Uso: python md5_cpu.py \"texto a hashear\"")
        sys.exit(1)

    texto = sys.argv[1]
    resultado = md5_of_string(texto)
    print(resultado)

if __name__ == "__main__":
    main()

#python md5_cpu.py "hola mundo"

5338b151154663bac9980b0f044518f2


# **Hit 5**

In [17]:
%%writefile minero_cuda.cu
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <cuda_runtime.h>

// ---------------- MD5 (dispositivo) ----------------
__constant__ uint32_t dev_shifts[16] = { 7,12,17,22, 5,9,14,20, 4,11,16,23, 6,10,15,21 };
__constant__ uint32_t dev_sines[64] = {
    0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee,0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501,
    0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be,0x6b901122,0xfd987193,0xa679438e,0x49b40821,
    0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa,0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8,
    0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed,0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a,
    0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c,0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70,
    0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05,0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665,
    0xf4292244,0x432aff97,0xab9423a7,0xfc93a039,0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1,
    0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1,0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391
};

// rotación izquierda
__device__ __forceinline__ uint32_t dev_left_rotate(uint32_t x, uint32_t c) {
    return (x << c) | (x >> (32 - c));
}

// transform que procesa un bloque de 64 bytes
__device__ void md5_transform_device(const uint8_t* chunk, uint32_t* h) {
    uint32_t M[16];
    for (int i = 0; i < 16; ++i) {
        M[i] = (uint32_t)chunk[i*4]
             | ((uint32_t)chunk[i*4+1] << 8)
             | ((uint32_t)chunk[i*4+2] << 16)
             | ((uint32_t)chunk[i*4+3] << 24);
    }

    uint32_t a = h[0], b = h[1], c = h[2], d = h[3];

    for (int i = 0; i < 64; ++i) {
        uint32_t F, g;
        if (i < 16) {
            F = (b & c) | ((~b) & d);
            g = i;
        } else if (i < 32) {
            F = (d & b) | ((~d) & c);
            g = (5*i + 1) & 15;
        } else if (i < 48) {
            F = b ^ c ^ d;
            g = (3*i + 5) & 15;
        } else {
            F = c ^ (b | (~d));
            g = (7*i) & 15;
        }

        uint32_t tmp = a + F + dev_sines[i] + M[g];
        a = d; d = c; c = b;

        uint32_t sh;
        if (i < 16) sh = dev_shifts[i % 4];
        else if (i < 32) sh = dev_shifts[4 + (i % 4)];
        else if (i < 48) sh = dev_shifts[8 + (i % 4)];
        else sh = dev_shifts[12 + (i % 4)];

        b = b + dev_left_rotate(tmp, sh);
    }

    h[0] += a; h[1] += b; h[2] += c; h[3] += d;
}


__device__ void cuda_md5_singleblock(const uint8_t* initial_msg, int initial_len, uint8_t* digest_out) {
    // estados iniciales
    uint32_t h[4];
    h[0] = 0x67452301;
    h[1] = 0xefcdab89;
    h[2] = 0x98badcfe;
    h[3] = 0x10325476;

    // preparar bloque (64 bytes) en stack
    uint8_t chunk[64];
    // inicializar con ceros
    for (int i = 0; i < 64; ++i) chunk[i] = 0;

    // copiar mensaje
    for (int i = 0; i < initial_len; ++i) chunk[i] = initial_msg[i];

    // padding
    chunk[initial_len] = 0x80;

    // longitud en bits, little-endian, en offset 56..63
    uint64_t bits_len = (uint64_t)initial_len * 8ULL;
    for (int i = 0; i < 8; ++i) chunk[56 + i] = (uint8_t)((bits_len >> (8 * i)) & 0xFF);

    // procesar un bloque
    md5_transform_device(chunk, h);

    // escribir digest_out en formato little-endian
    for (int i = 0; i < 4; ++i) {
        uint32_t v = h[i];
        digest_out[i*4 + 0] = (uint8_t)(v & 0xFF);
        digest_out[i*4 + 1] = (uint8_t)((v >> 8) & 0xFF);
        digest_out[i*4 + 2] = (uint8_t)((v >> 16) & 0xFF);
        digest_out[i*4 + 3] = (uint8_t)((v >> 24) & 0xFF);
    }
}

// -------------------- Device helpers --------------------
__device__ void int_to_str_device(int num, char *str) {
    int i = 0;
    if (num == 0) { str[0] = '0'; str[1] = '\0'; return; }
    unsigned int n = (num < 0) ? (unsigned int)(-num) : (unsigned int)num;
    char tmp[32];
    while (n > 0) { tmp[i++] = '0' + (n % 10); n /= 10; }
    int j = 0;
    if (num < 0) str[j++] = '-';
    while (i > 0) str[j++] = tmp[--i];
    str[j] = '\0';
}

__device__ int d_strlen(const char* s) {
    int i = 0; while (s[i] != '\0') ++i; return i;
}

__device__ bool starts_with_device(const char* hash, const char* prefix, int prefix_len) {
    for (int i = 0; i < prefix_len; ++i) if (hash[i] != prefix[i]) return false;
    return true;
}

__device__ void bytes_to_hex_device(const uint8_t* bytes, char* hex_out) {
    const char digits[] = "0123456789abcdef";
    for (int i = 0; i < 16; ++i) {
        hex_out[i * 2]     = digits[(bytes[i] >> 4) & 0x0F];
        hex_out[i * 2 + 1] = digits[bytes[i] & 0x0F];
    }
    hex_out[32] = '\0';
}

// -------------------- Kernel con conteo de intentos --------------------
// nonce + input
__global__ void mine_kernel_with_attempts(
    const char* input, int input_len,
    const char* prefix, int prefix_len,
    int from, int to,
    int* found_flag, int* out_nonce, char* out_hash,
    unsigned long long* attempts_global
) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    int stride = blockDim.x * gridDim.x;

    const int BUFFER_MAX = 2048;
    char nonce_str[32];
    char buffer[BUFFER_MAX];
    uint8_t md5_raw[16];
    char md5_hex[33];

    unsigned long long local_count = 0;
    const unsigned long long FLUSH_EVERY = 1024ULL;

    for (int nonce = from + idx; nonce <= to; nonce += stride) {
        if (*found_flag) {
            if (local_count) atomicAdd(attempts_global, local_count);
            return;
        }

        int_to_str_device(nonce, nonce_str);
        int nonce_len = d_strlen(nonce_str);

        // construir buffer: nonce + input
        if (nonce_len + input_len >= BUFFER_MAX) {
            // evitar overflow
            continue;
        }
        // copiar nonce
        for (int i = 0; i < nonce_len; ++i) buffer[i] = nonce_str[i];
        // copiar input despues
        for (int i = 0; i < input_len; ++i) buffer[nonce_len + i] = input[i];
        int total_len = nonce_len + input_len;

        // single-block requiere total_len <= 55
        if (total_len > 55) {
            // contar intento tambien
            local_count++;
            if (local_count >= FLUSH_EVERY) { atomicAdd(attempts_global, local_count); local_count = 0; }
            continue;
        }

        // calcular MD5
        cuda_md5_singleblock((const uint8_t*)buffer, total_len, md5_raw);
        bytes_to_hex_device(md5_raw, md5_hex);

        local_count++;
        if (local_count >= FLUSH_EVERY) {
            atomicAdd(attempts_global, local_count);
            local_count = 0;
        }

        if (starts_with_device(md5_hex, prefix, prefix_len)) {
            int prev = atomicCAS(found_flag, 0, 1);
            if (local_count) atomicAdd(attempts_global, local_count);
            if (prev == 0) {
                *out_nonce = nonce;
                for (int i = 0; i < 33; ++i) out_hash[i] = md5_hex[i];
            }
            return;
        }
    }

    if (local_count) atomicAdd(attempts_global, local_count);
    return;
}

// -------------------- Host main --------------------
int main(int argc, char *argv[]) {
    if (argc != 6) {
        fprintf(stderr, "Uso: %s <from> <to> <prefix> <input> <output>\n", argv[0]);
        return 1;
    }

    int from = atoi(argv[1]); int to = atoi(argv[2]);
    const char* prefix = argv[3];
    const char* input = argv[4];
    const char* output = argv[5];

    int input_len = (int)strlen(input);
    int prefix_len = (int)strlen(prefix);

    char *d_input = NULL, *d_prefix = NULL, *d_hash = NULL;
    int *d_found = NULL, *d_nonce = NULL;
    unsigned long long *d_attempts = NULL;

    cudaMalloc(&d_input, (input_len + 1) * sizeof(char));
    cudaMalloc(&d_prefix, (prefix_len + 1) * sizeof(char));
    cudaMalloc(&d_hash, 33 * sizeof(char));
    cudaMalloc(&d_found, sizeof(int));
    cudaMalloc(&d_nonce, sizeof(int));
    cudaMalloc(&d_attempts, sizeof(unsigned long long));

    cudaMemcpy(d_input, input, (input_len + 1) * sizeof(char), cudaMemcpyHostToDevice);
    cudaMemcpy(d_prefix, prefix, (prefix_len + 1) * sizeof(char), cudaMemcpyHostToDevice);
    cudaMemset(d_found, 0, sizeof(int));
    cudaMemset(d_attempts, 0, sizeof(unsigned long long));

    // parametros launching
    int threads = 256;
    int blocks = 1024; // aprox 262k hilos

    mine_kernel_with_attempts<<<blocks, threads>>>(
        d_input, input_len,
        d_prefix, prefix_len,
        from, to,
        d_found, d_nonce, d_hash,
        d_attempts
    );

    cudaDeviceSynchronize();

    cudaError_t err = cudaGetLastError();
    if (err != cudaSuccess) {
        fprintf(stderr, "CUDA kernel error: %s\n", cudaGetErrorString(err));
        // cleanup
        if (d_input) cudaFree(d_input);
        if (d_prefix) cudaFree(d_prefix);
        if (d_hash) cudaFree(d_hash);
        if (d_found) cudaFree(d_found);
        if (d_nonce) cudaFree(d_nonce);
        if (d_attempts) cudaFree(d_attempts);
        return 1;
    }

    int found = 0;
    int nonce = 0;
    char hash_host[33]; memset(hash_host, 0, sizeof(hash_host));
    unsigned long long attempts_host = 0ULL;

    cudaMemcpy(&found, d_found, sizeof(int), cudaMemcpyDeviceToHost);
    cudaMemcpy(&attempts_host, d_attempts, sizeof(unsigned long long), cudaMemcpyDeviceToHost);

    if (found) {
        cudaMemcpy(&nonce, d_nonce, sizeof(int), cudaMemcpyDeviceToHost);
        cudaMemcpy(hash_host, d_hash, 33 * sizeof(char), cudaMemcpyDeviceToHost);
    }

    // salida: "<nonce> <hash> <attempts>"
    FILE* f = fopen(output, "w");
    if (!f) {
        fprintf(stderr, "No se pudo abrir archivo %s\n", output);
    } else {
        if (found) {
            fprintf(f, "%d %s %llu", nonce, hash_host, attempts_host);
        } else {
            fprintf(f, "0 ");
            for (int i = 0; i < 32; ++i) fputc('0', f);
            fprintf(f, " %llu", attempts_host);
        }
        fclose(f);
    }

    if (d_input) cudaFree(d_input);
    if (d_prefix) cudaFree(d_prefix);
    if (d_hash) cudaFree(d_hash);
    if (d_found) cudaFree(d_found);
    if (d_nonce) cudaFree(d_nonce);
    if (d_attempts) cudaFree(d_attempts);

    return 0;
}

Overwriting minero_cuda.cu


In [7]:
!nvcc -arch=sm_75 -O2 minero_cuda.cu -o minero_cuda

In [8]:
%%writefile minero_gpu.py
import subprocess
import time
import sys
from pathlib import Path
import uuid
import json

BASE_DIR = Path(__file__).resolve().parent

if sys.platform.startswith("win"):
    CUDA_BIN = BASE_DIR / "minero_cuda.exe"
else:
    CUDA_BIN = BASE_DIR / "minero_cuda"

if not CUDA_BIN.exists():
    raise RuntimeError(f"No se encontro el binario CUDA: {CUDA_BIN}")

def ejecutar_minero(from_val, to_val, prefix, input_val):
    """
    Ejecuta el binario CUDA y devuelve un dict con:
      { "numero": int, "hash_md5_result": str, "intentos": int }
    Igual formato que antes (sin logging).
    """
    output_file = BASE_DIR / f"gpu_output_{uuid.uuid4().hex}.txt"
    start_time = time.time()

    cmd = [
        str(CUDA_BIN),
        str(from_val),
        str(to_val),
        prefix,
        input_val,
        str(output_file),
    ]

    proc = subprocess.run(cmd, capture_output=True, text=True)

    # Si falla el binario devolvemos la misma estructura vacia
    if proc.returncode != 0:
        return {"numero": 0, "hash_md5_result": "", "intentos": 0}

    contenido = output_file.read_text().strip() if output_file.exists() else ""

    try:
        if contenido:
            numero_str, hash_str, intentos_str = contenido.split()
            resultado = {
                "numero": int(numero_str),
                "hash_md5_result": hash_str if int(numero_str) > 0 else "",
                "intentos": int(intentos_str),
            }
        else:
            resultado = {"numero": 0, "hash_md5_result": "", "intentos": 0}
    except Exception:
        resultado = {"numero": 0, "hash_md5_result": "", "intentos": 0}

    # intento de borrar el archivo temporal
    try:
        output_file.unlink(missing_ok=True)
    except Exception:
        pass

    return resultado

Overwriting minero_gpu.py


In [None]:
from minero_gpu import ejecutar_minero

resultado = ejecutar_minero(0, 100000, "00", "hola mundo")
print(resultado)


{'numero': 21469, 'hash_md5_result': '00633e36003628dfa029e841be3f6218', 'intentos': 40960}


**Versión CPU**

In [None]:
%%writefile minero_cpu.py
import hashlib

def ejecutar_minero_cpu(from_val, to_val, prefix, input_val):
    """
    Version CPU.
    Formato:
      { "numero": int, "hash_md5_result": str, "intentos": int }
    """

    intentos = 0

    for nonce in range(from_val, to_val + 1):
        mensaje = f"{nonce}{input_val}".encode("utf-8")
        hash_md5 = hashlib.md5(mensaje).hexdigest()
        intentos += 1

        if hash_md5.startswith(prefix):
            return {
                "numero": nonce,
                "hash_md5_result": hash_md5,
                "intentos": intentos,
            }

    # Si no se encuentra nada
    return {
        "numero": 0,
        "hash_md5_result": "",
        "intentos": intentos,
    }

Writing minero_cpu.py


In [None]:
from minero_cpu import ejecutar_minero_cpu

resultado_cpu = ejecutar_minero_cpu(0, 100000, "00", "hola mundo")
print(resultado_cpu)

{'numero': 181, 'hash_md5_result': '00618bcc021f5a393c2b348f3c85318c', 'intentos': 182}


# **Hit 6**

In [19]:
import time
import csv
from minero_gpu import ejecutar_minero

# --- CONFIG ---
cadena = "hola mundo"
prefijos = ["0","00","000","0000","00000"] #Prefijos a probar
RANGO_FROM = 0
RANGO_TO   = 200000
REPETICIONES = 1
# -------------


resultados = []

for pref in prefijos:
    tiempos = []
    resultados_rep = []
    for rep in range(REPETICIONES):
        t0 = time.time()
        res = ejecutar_minero(RANGO_FROM, RANGO_TO, pref, cadena)
        t1 = time.time()
        tiempo = t1 - t0
        filas = {
            "prefijo": pref,
            "desde": RANGO_FROM,
            "hasta": RANGO_TO,
            "numero": res.get("numero",0),
            "hash": res.get("hash_md5_result",""),
            "intentos": res.get("intentos",0),
            "tiempo_s": round(tiempo,3),
            "hashes_por_s": round(res.get("intentos",0)/tiempo,1) if tiempo>0 else None
        }
        tiempos.append(tiempo)
        resultados_rep.append(filas)
        print(f"pref={pref} rep={rep+1} -> {filas}")

    resultados.append(resultados_rep[0])

print("\n--- RESULTADOS RESUMEN ---")
print("prefijo | numero | intentos | tiempo_s | hashes/s | hash")
for r in resultados:
    print(f"{r['prefijo']:6} | {r['numero']:6} | {r['intentos']:8} | {r['tiempo_s']:8} | {r['hashes_por_s']:10} | {r['hash'][:8]}...")


pref=0 rep=1 -> {'prefijo': '0', 'desde': 0, 'hasta': 200000, 'numero': 355, 'hash': '0c7656bab536172c9d592deb58dcde01', 'intentos': 40960, 'tiempo_s': 0.497, 'hashes_por_s': 82474.6}
pref=00 rep=1 -> {'prefijo': '00', 'desde': 0, 'hasta': 200000, 'numero': 16715, 'hash': '005508cc14203dfd30819aaf0482fa35', 'intentos': 40960, 'tiempo_s': 0.375, 'hashes_por_s': 109304.2}
pref=000 rep=1 -> {'prefijo': '000', 'desde': 0, 'hasta': 200000, 'numero': 12485, 'hash': '000726007b2192b4566fbd3eea4e549c', 'intentos': 40960, 'tiempo_s': 0.339, 'hashes_por_s': 120710.0}
pref=0000 rep=1 -> {'prefijo': '0000', 'desde': 0, 'hasta': 200000, 'numero': 27522, 'hash': '0000ce90bfa639b835d1b9b31094c5ea', 'intentos': 43264, 'tiempo_s': 0.34, 'hashes_por_s': 127097.5}
pref=00000 rep=1 -> {'prefijo': '00000', 'desde': 0, 'hasta': 200000, 'numero': 0, 'hash': '', 'intentos': 200001, 'tiempo_s': 0.333, 'hashes_por_s': 601387.8}

--- RESULTADOS RESUMEN ---
prefijo | numero | intentos | tiempo_s | hashes/s | hash