In [1]:
# Ingest test data
import numpy as np

# Example CSV file path (use your own file path)
file_path = './transportation_test_data.csv'

# Use np.genfromtxt to read the CSV file and skip the first two header rows
matrix = np.genfromtxt(file_path, delimiter=',', skip_header=2)

# Replace city label (1st column) with column of 1s (for intercept \beta_0)
matrix[:, 0] = 1

print(matrix)

[[1.00000e+00 1.92000e+05 1.50000e+01 1.80000e+06 5.80000e+03 5.00000e+01]
 [1.00000e+00 1.90400e+05 1.50000e+01 1.79000e+06 6.20000e+03 5.00000e+01]
 [1.00000e+00 1.91200e+05 1.50000e+01 1.78000e+06 6.40000e+03 6.00000e+01]
 [1.00000e+00 1.77600e+05 2.50000e+01 1.77800e+06 6.50000e+03 6.00000e+01]
 [1.00000e+00 1.76800e+05 2.50000e+01 1.75000e+06 6.55000e+03 6.00000e+01]
 [1.00000e+00 1.78400e+05 2.50000e+01 1.74000e+06 6.58000e+03 7.00000e+01]
 [1.00000e+00 1.80800e+05 2.50000e+01 1.72500e+06 8.20000e+03 7.50000e+01]
 [1.00000e+00 1.75200e+05 3.00000e+01 1.72500e+06 8.60000e+03 7.50000e+01]
 [1.00000e+00 1.74400e+05 3.00000e+01 1.72000e+06 8.80000e+03 7.50000e+01]
 [1.00000e+00 1.73920e+05 3.00000e+01 1.70500e+06 9.20000e+03 8.00000e+01]
 [1.00000e+00 1.72800e+05 3.00000e+01 1.71000e+06 9.63000e+03 8.00000e+01]
 [1.00000e+00 1.63200e+05 4.00000e+01 1.70000e+06 1.05700e+04 8.00000e+01]
 [1.00000e+00 1.61600e+05 4.00000e+01 1.69500e+06 1.13300e+04 8.50000e+01]
 [1.00000e+00 1.61600e+05

In [2]:
###  Basic regression in cleartext ###
def basic_regression(matrix):
    # Extract the second column (index 1) and save it to a separate array
    Y = matrix[:, 1]

    # Remove the second column from the original matrix
    X = np.delete(matrix, 1, axis=1)

    # Compute X^T X
    XT_X = np.dot(X.T, X)

    # Compute the inverse of X^T X
    XT_X_inv = np.linalg.inv(XT_X)

    # Compute X^T Y
    XT_Y = np.dot(X.T, Y)
    
    # print("XT_X:", XT_X)
    # print("XT_Y:", XT_X)
    # print("XT_X_inv:", XT_X_inv)

    # Compute beta = (X^T X)^-1 X^T Y
    beta = np.dot(XT_X_inv, XT_Y)
    return beta

In [3]:
###  Basic regression under SCRAM ###

def scram_postprocess(XT_X, XT_Y):
    # happens in cleartext

    # Compute the inverse of X^T X
    XT_X_inv = np.linalg.inv(XT_X)
    
    # Compute beta = (X^T X)^-1 X^T Y
    beta = np.dot(XT_X_inv, XT_Y)

    # print("XT_X_inv:", XT_X_inv)
    # print("Beta:", beta)
    return beta
    

def scram_combine(processed):
    # Compute under encryption
    X, Y, XT_X, XT_Y = processed[0]

    for i, (x, y, xT_x, xT_y) in enumerate(processed[1:]):
        X = np.vstack([X, x])
        Y = np.vstack([Y, y])
        XT_X += xT_x
        XT_Y += xT_y

    # Reveal XT_X, XT_Y
    # print("XT_X:", XT_X)
    # print("XT_Y:", XT_X)
    return (XT_X, XT_Y)

def scram_preprocessing(firm_inputs):
    processed = []
    for i, m in enumerate(firm_inputs):
        # Extract the second column (index 1) and save it to a separate array
        y = m[:, 1]
        # Remove the second column from the original matrix
        x = np.delete(m, 1, axis=1)

        # Compute X^T X
        xT_x = np.dot(x.T, x)

        # Compute X^T Y
        xT_y = np.dot(x.T, y)

        processed.append((x, y, xT_x, xT_y))
    
    return processed

def scram_regression(matrix):
    firm_inputs = []
    # Loop through each row in the matrix
    for row in matrix:
        # Reshape each row to a 2D matrix (1 row, multiple columns)
        row_matrix = row.reshape(1, -1)
        firm_inputs.append(row_matrix)

    processed = scram_preprocessing(firm_inputs)
    (XT_X, XT_Y) = scram_combine(processed)
    return scram_postprocess(XT_X, XT_Y)


In [4]:
# comparison
beta_clear = basic_regression(matrix)
print("Beta (computed in the clear):", beta_clear)
beta_scram = scram_regression(matrix)
print("Beta (computed under scram):", beta_scram)

Beta (computed in the clear): [ 1.00222561e+05 -6.89522723e+02  5.49412800e-02 -1.30136687e+00
  1.52456367e+02]
Beta (computed under scram): [ 1.00222561e+05 -6.89522723e+02  5.49412800e-02 -1.30136687e+00
  1.52456367e+02]
