<a href="https://colab.research.google.com/github/qwis95/rsa_demo/blob/main/RSA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import math
import numpy as np
import pandas as pd
import random

In [2]:
# Arbitrarily define p and q
p = 3001; q = 4567

In [3]:
# Ensure they are prime numbers
def is_prime(n):
    for i in range(2, n): # [2, n)
        if n % i == 0:
            return False
    return True

print(is_prime(p))
print(is_prime(q))

True
True


In [4]:
n = p * q
phi = (p-1)*(q-1)

In [5]:
# Private key:
e = 103
# Verify that its gcd with phi is 1
print(math.gcd(e, phi) == 1)

True


In [6]:
# Public key:
d = pow(e, -1, phi)
print(d) # 9373
print( e * d % phi == 1)

3191767
True


In [7]:
# To encrypt messages:
m = 17

In [8]:
# Encrypt with the python pow function:
# pow(base, exponent, mod)
# Thus, pow(plaintext, public key, n)
c = pow(m, e, n)

In [9]:
# Decrypt the same way
# pow(cipher, private key, n)
p = pow(c, d, n)
print(p == m)

True


In [10]:
m1 = 9; m2 = 11
m1_encrypted = pow(m1, e, n) # 2892
m2_encrypted = pow(m2, e, n) # 67

In [11]:
# validates that RSA is partially homomorphic
print( (m1_encrypted * m2_encrypted) % n == pow(m1*m2, e, n))

# The product of the encrypted values over the modulo n
# is equivalent to m1 * m2 raised to the power of e over modulo n

True


# Demo
### Let's start with 2 columns and multiply them together

In [12]:
# Lets see a super basic demo:
data = []
for i in range(10):
  val1 = random.randint(0, 99)
  val2 = random.randint(2, 5)
  data.append([val1, val2])
df = pd.DataFrame(data, columns=['Base', 'Scalar'])
base = df['Base']
scalar = df['Scalar']
enc_base = base.copy()
enc_scalar = scalar.copy()
df

Unnamed: 0,Base,Scalar
0,42,5
1,6,4
2,66,2
3,86,2
4,36,4
5,90,2
6,27,5
7,75,4
8,25,5
9,14,2


# Encrypt the values

In [13]:
for i in range(len(base)):
  enc_base[i] = pow(int(base[i]), e, n)
  enc_scalar[i] = pow(int(scalar[i]), e, n)

In [14]:
df1 = pd.DataFrame({'enc_base': enc_base, 'enc_scalar': enc_scalar})
df1

Unnamed: 0,enc_base,enc_scalar
0,11926471,753473
1,8843142,4304022
2,7328344,2884160
3,7304590,2884160
4,4771399,4304022
5,5651997,2884160
6,9664750,753473
7,13585902,4304022
8,9565455,753473
9,3181172,2884160


# Compute the product

In [15]:
for i in range(len(df1)):
  df1.loc[i, 'enc_product'] = (df1.loc[i, 'enc_base'] * df1.loc[i, 'enc_scalar'] % n)
df1

Unnamed: 0,enc_base,enc_scalar,enc_product
0,11926471,753473,13296728.0
1,8843142,4304022,5468640.0
2,7328344,2884160,7954155.0
3,7304590,2884160,11746948.0
4,4771399,4304022,3963050.0
5,5651997,2884160,13038957.0
6,9664750,753473,4084908.0
7,13585902,4304022,709663.0
8,9565455,753473,6673626.0
9,3181172,2884160,9085308.0


# Decrypt the calculated product

In [16]:
for i in range(len(df1)):
  df1.loc[i, 'dec_product'] = pow(int(df1.loc[i, 'enc_product']), d, n)
df1

Unnamed: 0,enc_base,enc_scalar,enc_product,dec_product
0,11926471,753473,13296728.0,210.0
1,8843142,4304022,5468640.0,24.0
2,7328344,2884160,7954155.0,132.0
3,7304590,2884160,11746948.0,172.0
4,4771399,4304022,3963050.0,144.0
5,5651997,2884160,13038957.0,180.0
6,9664750,753473,4084908.0,135.0
7,13585902,4304022,709663.0,300.0
8,9565455,753473,6673626.0,125.0
9,3181172,2884160,9085308.0,28.0


# Calculate with plaintext to verify results:

In [17]:
for i in range(len(df)):
  df.loc[i, 'Product'] = df.loc[i, 'Base'] * df.loc[i, 'Scalar'] % n
df

Unnamed: 0,Base,Scalar,Product
0,42,5,210.0
1,6,4,24.0
2,66,2,132.0
3,86,2,172.0
4,36,4,144.0
5,90,2,180.0
6,27,5,135.0
7,75,4,300.0
8,25,5,125.0
9,14,2,28.0


In [18]:
dfFin = pd.DataFrame({'Product': df['Product'], 'enc_product': df1['enc_product'], 'dec_product': df1['dec_product']})
dfFin

Unnamed: 0,Product,enc_product,dec_product
0,210.0,13296728.0,210.0
1,24.0,5468640.0,24.0
2,132.0,7954155.0,132.0
3,172.0,11746948.0,172.0
4,144.0,3963050.0,144.0
5,180.0,13038957.0,180.0
6,135.0,4084908.0,135.0
7,300.0,709663.0,300.0
8,125.0,6673626.0,125.0
9,28.0,9085308.0,28.0


# We can see that the values are identical, and that the encrypted product doesn't disclose any of the information of the data.