<a href="https://colab.research.google.com/github/qwis95/rsa_demo/blob/main/RSA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [54]:
import math
import numpy as np
import pandas as pd
import random

# How to quickly implement RSA in python


In [55]:
# Arbitrarily define primes p and q
# Here we use unsafe small primes so that the numbers are comprehendable still
p = 3001; q = 4567

In [56]:
# Ensure they are prime numbers
def is_prime(n):
    for i in range(2, n): # [2, n)
        if n % i == 0:
            return False
    return True

print(is_prime(p))
print(is_prime(q))

True
True


In [57]:
# n is defined as p * q so:
n = p * q
phi = (p-1)*(q-1)

In [58]:
# Private key:
e = 587
# Verify that its gcd with phi is 1
print(math.gcd(e, phi) == 1)

True


In [59]:
# Public key:
d = pow(e, -1, phi)
print(d) # 8797523
print( e * d % phi == 1)

8797523
True


In [60]:
# Encrypt a message m:
m = 9753

In [61]:
# Encrypt with the python pow function:
# pow(base, exponent, mod)
# Thus, pow(plaintext, public key, n)
c = pow(m, e, n)

In [62]:
# Decrypt the same way
# pow(ciphertext, private key, n)
p = pow(c, d, n)
print(p == m)

True


# Demo
### Let's start with 2 columns and multiply them together

In [63]:
# Lets see a super basic demo:
data = []
for i in range(10):
  val1 = random.randint(0, 99)
  val2 = random.randint(2, 5)
  data.append([val1, val2])
df = pd.DataFrame(data, columns=['Base', 'Scalar'])
base = df['Base']
scalar = df['Scalar']
enc_base = base.copy()
enc_scalar = scalar.copy()
df

Unnamed: 0,Base,Scalar
0,21,2
1,77,2
2,0,3
3,11,5
4,87,3
5,33,2
6,55,3
7,70,4
8,29,5
9,86,2


# Encrypt the values

In [64]:
for i in range(len(base)):
  enc_base[i] = pow(int(base[i]), e, n)
  enc_scalar[i] = pow(int(scalar[i]), e, n)

In [65]:
df1 = pd.DataFrame({'enc_base': enc_base, 'enc_scalar': enc_scalar})
df1

Unnamed: 0,enc_base,enc_scalar
0,1699327,12726111
1,2593715,12726111
2,0,13578427
3,1326969,12199167
4,6703314,13578427
5,4691110,12726111
6,10845350,13578427
7,6985300,12893771
8,2727478,12199167
9,12374698,12726111


# Compute the product

In [66]:
for i in range(len(df1)):
  df1.loc[i, 'enc_product'] = (df1.loc[i, 'enc_base'] * df1.loc[i, 'enc_scalar'] % n)
df1

Unnamed: 0,enc_base,enc_scalar,enc_product
0,1699327,12726111,1735935.0
1,2593715,12726111,6768946.0
2,0,13578427,0.0
3,1326969,12199167,10845350.0
4,6703314,13578427,7636368.0
5,4691110,12726111,678322.0
6,10845350,13578427,11885736.0
7,6985300,12893771,12336316.0
8,2727478,12199167,9427194.0
9,12374698,12726111,4855461.0


# Decrypt the calculated product

In [67]:
for i in range(len(df1)):
  df1.loc[i, 'dec_product'] = pow(int(df1.loc[i, 'enc_product']), d, n)
df1

Unnamed: 0,enc_base,enc_scalar,enc_product,dec_product
0,1699327,12726111,1735935.0,42.0
1,2593715,12726111,6768946.0,154.0
2,0,13578427,0.0,0.0
3,1326969,12199167,10845350.0,55.0
4,6703314,13578427,7636368.0,261.0
5,4691110,12726111,678322.0,66.0
6,10845350,13578427,11885736.0,165.0
7,6985300,12893771,12336316.0,280.0
8,2727478,12199167,9427194.0,145.0
9,12374698,12726111,4855461.0,172.0


# Calculate with plaintext to verify results:

In [68]:
for i in range(len(df)):
  df.loc[i, 'Product'] = df.loc[i, 'Base'] * df.loc[i, 'Scalar'] % n
df

Unnamed: 0,Base,Scalar,Product
0,21,2,42.0
1,77,2,154.0
2,0,3,0.0
3,11,5,55.0
4,87,3,261.0
5,33,2,66.0
6,55,3,165.0
7,70,4,280.0
8,29,5,145.0
9,86,2,172.0


In [69]:
dfFin = pd.DataFrame({'Product': df['Product'], 'enc_product': df1['enc_product'], 'dec_product': df1['dec_product']})
dfFin

Unnamed: 0,Product,enc_product,dec_product
0,42.0,1735935.0,42.0
1,154.0,6768946.0,154.0
2,0.0,0.0,0.0
3,55.0,10845350.0,55.0
4,261.0,7636368.0,261.0
5,66.0,678322.0,66.0
6,165.0,11885736.0,165.0
7,280.0,12336316.0,280.0
8,145.0,9427194.0,145.0
9,172.0,4855461.0,172.0


# We can see that the values are identical, and that the encrypted product doesn't disclose any of the information of the data.