<a href="https://colab.research.google.com/github/qwis95/rsa_demo/blob/main/RSA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [70]:
import math
import numpy as np
import pandas as pd
import random

# How to quickly implement RSA in python


In [71]:
# Arbitrarily define primes p and q
# Here we use unsafe small primes so that the numbers are comprehendable still
p = 3001; q = 4567

In [72]:
# Ensure they are prime numbers
def is_prime(n):
    for i in range(2, n): # [2, n)
        if n % i == 0:
            return False
    return True

print(is_prime(p))
print(is_prime(q))

True
True


In [73]:
# n is defined as p * q so:
n = p * q
phi = (p-1)*(q-1)

In [74]:
# Private key:
e = 587
# Verify that its gcd with phi is 1
print(math.gcd(e, phi) == 1)

True


In [75]:
# Public key:
d = pow(e, -1, phi)
print(d) # 8797523
print( e * d % phi == 1)

8797523
True


In [76]:
# Encrypt a message m:
m = 9753

In [77]:
# Encrypt with the python pow function:
# pow(base, exponent, mod)
# Thus, pow(plaintext, public key, n)
c = pow(m, e, n)

In [78]:
# Decrypt the same way
# pow(ciphertext, private key, n)
p = pow(c, d, n)
print(p == m)

True


# Demo
### Let's start with 2 columns and multiply them together

In [79]:
# Lets see a super basic demo:
data = []
for i in range(10):
  val1 = random.randint(1, 999)
  val2 = random.randint(2, 20)
  data.append([val1, val2])
df = pd.DataFrame(data, columns=['Base', 'Scalar'])
base = df['Base']
scalar = df['Scalar']
enc_base = base.copy()
enc_scalar = scalar.copy()
df

Unnamed: 0,Base,Scalar
0,228,18
1,540,3
2,380,10
3,551,8
4,333,14
5,383,2
6,183,8
7,598,16
8,603,14
9,708,6


# Encrypt the values

In [80]:
for i in range(len(base)):
  enc_base[i] = pow(int(base[i]), e, n)
  enc_scalar[i] = pow(int(scalar[i]), e, n)

In [81]:
df1 = pd.DataFrame({'enc_base': enc_base, 'enc_scalar': enc_scalar})
df1

Unnamed: 0,enc_base,enc_scalar
0,10376177,7705007
1,8538657,13578427
2,12802385,7114149
3,5217778,3699038
4,11844208,7164848
5,9225389,12726111
6,9583984,3699038
7,3076520,7967555
8,10785167,7164848
9,11107243,12959645


# Compute the product

In [82]:
for i in range(len(df1)):
  df1.loc[i, 'enc_product'] = (df1.loc[i, 'enc_base'] * df1.loc[i, 'enc_scalar'] % n)
df1

Unnamed: 0,enc_base,enc_scalar,enc_product
0,10376177,7705007,10609510.0
1,8538657,13578427,13111090.0
2,12802385,7114149,4070987.0
3,5217778,3699038,4014350.0
4,11844208,7164848,12127722.0
5,9225389,12726111,12131211.0
6,9583984,3699038,2421275.0
7,3076520,7967555,4256935.0
8,10785167,7164848,12217432.0
9,11107243,12959645,1688557.0


# Decrypt the calculated product

In [83]:
for i in range(len(df1)):
  df1.loc[i, 'dec_product'] = pow(int(df1.loc[i, 'enc_product']), d, n)
df1

Unnamed: 0,enc_base,enc_scalar,enc_product,dec_product
0,10376177,7705007,10609510.0,4104.0
1,8538657,13578427,13111090.0,1620.0
2,12802385,7114149,4070987.0,3800.0
3,5217778,3699038,4014350.0,4408.0
4,11844208,7164848,12127722.0,4662.0
5,9225389,12726111,12131211.0,766.0
6,9583984,3699038,2421275.0,1464.0
7,3076520,7967555,4256935.0,9568.0
8,10785167,7164848,12217432.0,8442.0
9,11107243,12959645,1688557.0,4248.0


# Calculate with plaintext to verify results:

In [84]:
for i in range(len(df)):
  df.loc[i, 'Product'] = df.loc[i, 'Base'] * df.loc[i, 'Scalar'] % n
df

Unnamed: 0,Base,Scalar,Product
0,228,18,4104.0
1,540,3,1620.0
2,380,10,3800.0
3,551,8,4408.0
4,333,14,4662.0
5,383,2,766.0
6,183,8,1464.0
7,598,16,9568.0
8,603,14,8442.0
9,708,6,4248.0


In [85]:
dfFin = pd.DataFrame({'Product': df['Product'], 'enc_product': df1['enc_product'], 'dec_product': df1['dec_product']})
dfFin

Unnamed: 0,Product,enc_product,dec_product
0,4104.0,10609510.0,4104.0
1,1620.0,13111090.0,1620.0
2,3800.0,4070987.0,3800.0
3,4408.0,4014350.0,4408.0
4,4662.0,12127722.0,4662.0
5,766.0,12131211.0,766.0
6,1464.0,2421275.0,1464.0
7,9568.0,4256935.0,9568.0
8,8442.0,12217432.0,8442.0
9,4248.0,1688557.0,4248.0


# We can see that the values are identical, and that the encrypted product doesn't disclose any of the information of the data.