In [1]:
%%writefile sha256.cuh
#ifndef SHA256_H
#define SHA256_H
#include <stdint.h>
#include <string.h>


/****************************** MACROS ******************************/
#define SHA256_BLOCK_SIZE 32            // SHA256 outputs a 32 byte digest

#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b))))
#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b))))

#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z)))
#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3))
#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10))

#define checkCudaErrors(x) \
{ \
    cudaGetLastError(); \
    x; \
    cudaError_t err = cudaGetLastError(); \
    if (err != cudaSuccess) \
        printf("GPU: cudaError %d (%s)\n", err, cudaGetErrorString(err)); \
}
/**************************** DATA TYPES ****************************/
typedef unsigned char BYTE;             // 8-bit byte
typedef uint32_t  WORD;             // 32-bit word, change to "long" for 16-bit machines

typedef struct JOB {
	BYTE * data;
	unsigned long long size;
	BYTE digest[64];
	char fname[128];
}JOB;


typedef struct {
	BYTE data[64];
	WORD datalen;
	unsigned long long bitlen;
	WORD state[8];
} SHA256_CTX;

__constant__ WORD dev_k[64];

static const WORD host_k[64] = {
	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
};

/*********************** FUNCTION DECLARATIONS **********************/
char * print_sha(BYTE * buff);
__device__ void sha256_init(SHA256_CTX *ctx);
__device__ void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len);
__device__ void sha256_final(SHA256_CTX *ctx, BYTE hash[]);


char * hash_to_string(BYTE * buff) {
	char * string = (char *)malloc(70);
	int k, i;
	for (i = 0, k = 0; i < 32; i++, k+= 2)
	{
		sprintf(string + k, "%.2x", buff[i]);
		//printf("%02x", buff[i]);
	}
	string[64] = 0;
	return string;
}

void print_job(JOB * j){
	printf("%s  %s\n", hash_to_string(j->digest), j->fname);
}

void print_jobs(JOB ** jobs, int n) {
	for (int i = 0; i < n; i++)
	{
        print_job(jobs[i]);
		// printf("@ %p JOB[%i] \n", jobs[i], i);
		// printf("\t @ 0x%p data = %x \n", jobs[i]->data, (jobs[i]->data == 0)? 0 : jobs[i]->data[0]);
		// printf("\t @ 0x%p size = %llu \n", &(jobs[i]->size), jobs[i]->size);
		// printf("\t @ 0x%p fname = %s \n", &(jobs[i]->fname), jobs[i]->fname);
		// printf("\t @ 0x%p digest = %s \n------\n", jobs[i]->digest, hash_to_string(jobs[i]->digest));
	}
}

__device__ void mycpy12(uint32_t *d, const uint32_t *s) {
#pragma unroll 3
    for (int k=0; k < 3; k++) d[k] = s[k];
}

__device__ void mycpy16(uint32_t *d, const uint32_t *s) {
#pragma unroll 4
    for (int k=0; k < 4; k++) d[k] = s[k];
}

__device__ void mycpy32(uint32_t *d, const uint32_t *s) {
#pragma unroll 8
    for (int k=0; k < 8; k++) d[k] = s[k];
}

__device__ void mycpy44(uint32_t *d, const uint32_t *s) {
#pragma unroll 11
    for (int k=0; k < 11; k++) d[k] = s[k];
}

__device__ void mycpy48(uint32_t *d, const uint32_t *s) {
#pragma unroll 12
    for (int k=0; k < 12; k++) d[k] = s[k];
}

__device__ void mycpy64(uint32_t *d, const uint32_t *s) {
#pragma unroll 16
    for (int k=0; k < 16; k++) d[k] = s[k];
}

__device__ void sha256_transform(SHA256_CTX *ctx, const BYTE data[])
{
	WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64];
    // WORD S[8];

    //mycpy32(S, ctx->state);

    #pragma unroll 16
	for (i = 0, j = 0; i < 16; ++i, j += 4)
		m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]);

    #pragma unroll 64
	for (; i < 64; ++i)
		m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16];

	a = ctx->state[0];
	b = ctx->state[1];
	c = ctx->state[2];
	d = ctx->state[3];
	e = ctx->state[4];
	f = ctx->state[5];
	g = ctx->state[6];
	h = ctx->state[7];

    #pragma unroll 64
	for (i = 0; i < 64; ++i) {
		t1 = h + EP1(e) + CH(e, f, g) + dev_k[i] + m[i];
		t2 = EP0(a) + MAJ(a, b, c);
		h = g;
		g = f;
		f = e;
		e = d + t1;
		d = c;
		c = b;
		b = a;
		a = t1 + t2;
	}

	ctx->state[0] += a;
	ctx->state[1] += b;
	ctx->state[2] += c;
	ctx->state[3] += d;
	ctx->state[4] += e;
	ctx->state[5] += f;
	ctx->state[6] += g;
	ctx->state[7] += h;
}

__device__ void sha256_init(SHA256_CTX *ctx)
{
	ctx->datalen = 0;
	ctx->bitlen = 0;
	ctx->state[0] = 0x6a09e667;
	ctx->state[1] = 0xbb67ae85;
	ctx->state[2] = 0x3c6ef372;
	ctx->state[3] = 0xa54ff53a;
	ctx->state[4] = 0x510e527f;
	ctx->state[5] = 0x9b05688c;
	ctx->state[6] = 0x1f83d9ab;
	ctx->state[7] = 0x5be0cd19;
}

__device__ void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len)
{
	WORD i;

	// for each byte in message
	for (i = 0; i < len; ++i) {
		// ctx->data == message 512 bit chunk
		ctx->data[ctx->datalen] = data[i];
		ctx->datalen++;
		if (ctx->datalen == 64) {
			sha256_transform(ctx, ctx->data);
			ctx->bitlen += 512;
			ctx->datalen = 0;
		}
	}
}

__device__ void sha256_final(SHA256_CTX *ctx, BYTE hash[])
{
	WORD i;

	i = ctx->datalen;

	// Pad whatever data is left in the buffer.
	if (ctx->datalen < 56) {
		ctx->data[i++] = 0x80;
		while (i < 56)
			ctx->data[i++] = 0x00;
	}
	else {
		ctx->data[i++] = 0x80;
		while (i < 64)
			ctx->data[i++] = 0x00;
		sha256_transform(ctx, ctx->data);
		memset(ctx->data, 0, 56);
	}

	// Append to the padding the total message's length in bits and transform.
	ctx->bitlen += ctx->datalen * 8;
	ctx->data[63] = ctx->bitlen;
	ctx->data[62] = ctx->bitlen >> 8;
	ctx->data[61] = ctx->bitlen >> 16;
	ctx->data[60] = ctx->bitlen >> 24;
	ctx->data[59] = ctx->bitlen >> 32;
	ctx->data[58] = ctx->bitlen >> 40;
	ctx->data[57] = ctx->bitlen >> 48;
	ctx->data[56] = ctx->bitlen >> 56;
	sha256_transform(ctx, ctx->data);

	// Since this implementation uses little endian byte ordering and SHA uses big endian,
	// reverse all the bytes when copying the final state to the output hash.
	for (i = 0; i < 4; ++i) {
		hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff;
		hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff;
		hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff;
		hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff;
		hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff;
		hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff;
		hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff;
		hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff;
	}
}

#endif   // SHA256_H

Writing sha256.cuh


In [2]:
%%writefile encrypt.cu
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sha256.cuh"

__global__ void CudaCryptKernel(const BYTE *rawPassword, BYTE *encryptedPassword) {
    SHA256_CTX ctx;
    sha256_init(&ctx);
    sha256_update(&ctx, rawPassword, 4);
    sha256_final(&ctx, encryptedPassword);
}

int main(int argc, char *argv[]) {
    if (argc != 2) {
        printf("Usage: %s <password>\n", argv[0]);
        return 1;
    }

    const char *inputPassword = argv[1];

    if (!(strlen(inputPassword) == 4 &&
          inputPassword[0] >= 'A' && inputPassword[0] <= 'Z' &&
          inputPassword[1] >= 'A' && inputPassword[1] <= 'Z' &&
          inputPassword[2] >= '0' && inputPassword[2] <= '9' &&
          inputPassword[3] >= '0' && inputPassword[3] <= '9')) {
        printf("Error: The input password is incorrect or out of range.\n");
        return 1;
    }

    BYTE passwordBytes[4];
    passwordBytes[0] = (BYTE)inputPassword[0];
    passwordBytes[1] = (BYTE)inputPassword[1];
    passwordBytes[2] = (BYTE)inputPassword[2];
    passwordBytes[3] = (BYTE)inputPassword[3];

    BYTE *gpuEncryptedPass, *gpuPasswordBytes;
    cudaMalloc((void **)&gpuEncryptedPass, sizeof(BYTE) * SHA256_BLOCK_SIZE);
    cudaMalloc((void **)&gpuPasswordBytes, sizeof(BYTE) * 4);

    cudaMemcpy(gpuPasswordBytes, passwordBytes, sizeof(BYTE) * 4, cudaMemcpyHostToDevice);

    CudaCryptKernel<<<1, 1>>>(gpuPasswordBytes, gpuEncryptedPass);

    BYTE encryptedPass[SHA256_BLOCK_SIZE];
    cudaMemcpy(encryptedPass, gpuEncryptedPass, sizeof(BYTE) * SHA256_BLOCK_SIZE, cudaMemcpyDeviceToHost);

    printf("Hashed Password: %s -> ", inputPassword);
    for (int i = 0; i < SHA256_BLOCK_SIZE; i++) {
        printf("%02x", encryptedPass[i]);
    }
    printf("\n");

    cudaFree(gpuEncryptedPass);
    cudaFree(gpuPasswordBytes);

    return 0;
}

Writing encrypt.cu


In [3]:
!nvcc -o enc encrypt.cu

In [4]:
!./enc CD89

Hashed Password: CD89 -> 28628992c0b5174fbed37c1f8c31872b26aab04898639763e7044cd85c536ed6


In [5]:
%%writefile decrypt.cu
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sha256.cuh"

#define MAX_COMBINATIONS 26 * 26 * 10 * 10
#define PASSWORD_LENGTH 5

__global__ void CudaCryptKernel(BYTE *userHash, int *found, BYTE *decryptedPassword) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;

    if (idx >= MAX_COMBINATIONS || *found) {
        return;
    }

    int c1 = idx / (26 * 10 * 10);
    int c2 = (idx / (10 * 10)) % 26;
    int d1 = (idx / 10) % 10;
    int d2 = idx % 10;

    BYTE inputPassword[PASSWORD_LENGTH];
    inputPassword[0] = 'A' + c1;
    inputPassword[1] = 'A' + c2;
    inputPassword[2] = '0' + d1;
    inputPassword[3] = '0' + d2;
    inputPassword[PASSWORD_LENGTH - 1] = '\0';

    BYTE passwordBytes[4];
    passwordBytes[0] = inputPassword[0];
    passwordBytes[1] = inputPassword[1];
    passwordBytes[2] = inputPassword[2];
    passwordBytes[3] = inputPassword[3];

    SHA256_CTX ctx;
    sha256_init(&ctx);
    sha256_update(&ctx, passwordBytes, 4);

    BYTE encryptedPass[SHA256_BLOCK_SIZE];
    sha256_final(&ctx, encryptedPass);

    int match = 1;
    for (int i = 0; i < SHA256_BLOCK_SIZE; i++) {
        if (userHash[i] != encryptedPass[i]) {
            match = 0;
            break;
        }
    }

    if (match) {
        *found = 1;
        for (int i = 0; i < PASSWORD_LENGTH; ++i) {
            decryptedPassword[i] = inputPassword[i];
        }
    }
}

int main() {
    BYTE userHash[SHA256_BLOCK_SIZE];
    printf("Enter the hash: ");
    char userInputHash[2 * SHA256_BLOCK_SIZE + 1];
    scanf("%64s", userInputHash);

    for (int i = 0; i < SHA256_BLOCK_SIZE; i++) {
        sscanf(userInputHash + 2 * i, "%2hhx", &userHash[i]);
    }

    int found = 0;
    BYTE decryptedPassword[PASSWORD_LENGTH];

    BYTE *gpuUserHash, *gpuDecryptedPassword;
    int *gpuFound;

    cudaMalloc((void **)&gpuUserHash, sizeof(BYTE) * SHA256_BLOCK_SIZE);
    cudaMalloc((void **)&gpuDecryptedPassword, sizeof(BYTE) * PASSWORD_LENGTH);
    cudaMalloc((void **)&gpuFound, sizeof(int));

    cudaMemcpy(gpuUserHash, userHash, sizeof(BYTE) * SHA256_BLOCK_SIZE, cudaMemcpyHostToDevice);
    cudaMemcpy(gpuFound, &found, sizeof(int), cudaMemcpyHostToDevice);

    int threadsPerBlock = 256;
    int blocksPerGrid = (MAX_COMBINATIONS + threadsPerBlock - 1) / threadsPerBlock;

    CudaCryptKernel<<<blocksPerGrid, threadsPerBlock>>>(gpuUserHash, gpuFound, gpuDecryptedPassword);
    cudaDeviceSynchronize();

    cudaMemcpy(&found, gpuFound, sizeof(int), cudaMemcpyDeviceToHost);
    if (found) {
        cudaMemcpy(decryptedPassword, gpuDecryptedPassword, sizeof(BYTE) * PASSWORD_LENGTH, cudaMemcpyDeviceToHost);
        printf("Password Decrypted: %s\n", decryptedPassword);
    } else {
        printf("No matching password found.\n");
    }

    cudaFree(gpuUserHash);
    cudaFree(gpuDecryptedPassword);
    cudaFree(gpuFound);

    return 0;
}

Writing decrypt.cu


In [6]:
!nvcc -o decryption decrypt.cu

In [7]:
!./decryption

Enter the hash: 28628992c0b5174fbed37c1f8c31872b26aab04898639763e7044cd85c536ed6
Password Decrypted: CD89
