In [73]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np

# file_list = [("BLS12", "./bls12/monte_carlo_f_exp_50000.csv"),
#             ("BN", "./bn/monte_carlo_f_exp_50000.csv"),
#             ("MNT4", "./mnt4/monte_carlo_f_exp_50000.csv"),
#             ("MNT6", "./mnt6/monte_carlo_f_exp_50000.csv")]

file_list = [("BLS12", "./bls12/monte_carlo_f_exp_55000.csv"),
            ("BN", "./bn/monte_carlo_f_exp_55000.csv"),
            ("MNT4", "./mnt4/monte_carlo_f_exp_55000.csv"),
            ("MNT6", "./mnt6/monte_carlo_f_exp_55000.csv")]



def get_dfs(files):
    results = []
    for file in files:
        (name, path) = file;
        df = pd.read_csv(path)
        df = df[df["x_is_negative"] == 1.0]
        df.drop("x_is_negative", axis = 1, inplace = True)
        results.append(df)
        
    return results

In [74]:
from sklearn import linear_model

def factor_out_final_exp(df, group_by = 3):
    df_final_exps = pd.DataFrame(columns = df.columns);
    df_final_exps.drop("num_pairs", axis = 1, inplace = True);
    df_final_exps.drop("run_microseconds", axis = 1, inplace = True);
    df_final_exps["final_exp_time"] = 0.0

    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("run_microseconds", axis = 1, inplace = True);
    df_miller_loops["single_pair_miller_time"] = 0.0


    for k,g in df.groupby(np.arange(len(df))//group_by):
        reg = linear_model.LinearRegression(fit_intercept = True)
        model = reg.fit(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        g_miller = g.iloc[0].copy()
        g_miller.drop("run_microseconds", inplace = True)
        g_final_exp = g.iloc[0].copy()
        g_final_exp.drop("run_microseconds", inplace = True)
        g_miller["single_pair_miller_time"] = model.coef_[0][0];
        g_final_exp["final_exp_time"] = model.intercept_[0];

        g_miller.drop("num_pairs", inplace = True)
        g_final_exp.drop("num_pairs", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)
        
    return (df_miller_loops, df_final_exps)

In [75]:
dataframes = get_dfs(file_list)

In [76]:
dataframes[0].head()

Unnamed: 0,x_bit_length,x_hamming_weight,modulus_limbs,group_limbs,num_pairs,run_microseconds
0,57,39,6,16,2,22083
1,57,39,6,16,4,25577
2,57,39,6,16,6,36677
3,64,7,7,11,2,15900
4,64,7,7,11,4,22633


In [77]:
dataframes[1].head()

Unnamed: 0,six_u_plus_two_bit_length,six_u_plus_two_hamming,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,run_microseconds
0,51,29,10,2,2,49,29,25174
1,51,29,10,2,4,49,29,28713
2,51,29,10,2,6,49,29,34342
3,5,1,6,4,2,2,2,7393
4,5,1,6,4,4,2,2,8684


In [78]:
dataframes[2].head()

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,exp_w0_bit_length,exp_w0_hamming,exp_w0_is_negative,exp_w1_bit_length,exp_w1_hamming,run_microseconds
0,4,2,2,119,118,1789,279,1,1580,1275,5357
1,4,2,4,119,118,1789,279,1,1580,1275,7141
2,4,2,6,119,118,1789,279,1,1580,1275,8935
3,10,1,2,371,284,792,321,1,1146,669,27374
4,10,1,4,371,284,792,321,1,1146,669,44052


In [79]:
dataframes[3].head()

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,exp_w0_bit_length,exp_w0_hamming,exp_w0_is_negative,exp_w1_bit_length,exp_w1_hamming,run_microseconds
0,9,9,2,1917,1474,1601,1371,1,672,17,219275
1,9,9,4,1917,1474,1601,1371,1,672,17,402700
2,9,9,6,1917,1474,1601,1371,1,672,17,606740
3,11,3,2,1740,1291,765,646,1,898,478,215073
4,11,3,4,1740,1291,765,646,1,898,478,425829


In [80]:
from sklearn.model_selection import train_test_split

def split_df(df):
    train, test = train_test_split(
        df, test_size=0.10, random_state=42)
    
    print("Train samples {}, test samples {}".format(len(train), len(test)))
    
    return (train, test)

In [81]:
from sklearn import linear_model
from sklearn import linear_model
from sklearn.linear_model import Lasso

from sklearn.metrics import max_error, mean_absolute_error, r2_score

def pretty_print_polynomial(poly, model, variable_names):
    terms = []

    for term_idx in range(0, poly.powers_.shape[0]):
        coeff = model.coef_[term_idx]
        if coeff == 0:
            continue
        coeff = np.around(coeff, decimals=6)
        subparts = []
        coeff_string = "{}".format(coeff)
        subparts.append(coeff_string)
        for variable_idx in range(0, poly.powers_.shape[1]):
            power = poly.powers_[term_idx, variable_idx]
            if power != 0:
                term_string = '{}^{}'.format(variable_names[variable_idx], power)
                subparts.append(term_string)
        if len(subparts) != 0:
            joined = " * ".join(subparts)
            terms.append(joined)

    polynomial_string = " + ".join(terms)
    print(polynomial_string)
    


def analyze(train, test, features, target, trunc_limit = 0.001, degree = 6):
    poly = PolynomialFeatures(degree = degree, include_bias = False)

    X_train = train[features]
    Y_train = train[target]

    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=10000,fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]

    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [82]:
def analyze_bls12(df):
    (miller, final_exp) = factor_out_final_exp(df)
    (train, test) = split_df(miller)
    print("Fitting miller loop price")
    model_miller = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs", "group_limbs"], "single_pair_miller_time")
    
    (train, test) = split_df(final_exp)
    print("Fitting final exp price")
    model_final_exp = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs"], "final_exp_time")
    
    return (model_miller, model_final_exp)

In [83]:
(bls_miller, bls_final_exp) = analyze_bls12(dataframes[0])

Train samples 921, test samples 103
Fitting miller loop price
score on training set 0.9434042151551887
score on test set 0.9339235710727891
Model accuracy before manual truncation of coefficients
Max absolute error 7982.472382042957 microseconds
Mean absolute error 1031.2139043562947 microseconds
R2 score = 0.9339235710727891
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 8050.237964108423 microseconds
Mean absolute error 1021.3118032409297 microseconds
R2 score = 0.9342395308646859
20.684078 * modulus_limbs^1 * group_limbs^1 + 0.154826 * x_bit_length^1 * modulus_limbs^2 + 0.003588 * x_bit_length^1 * modulus_limbs^1 * group_limbs^1 + 0.212357 * x_hamming_weight^1 * modulus_limbs^2 + 4.807249 * modulus_limbs^2 * group_limbs^1 + 0.001019 * x_bit_length^1 * modulus_limbs^2 * group_limbs^1
Train samples 921, test samples 103
Fitting final exp price
score on training set 0.8660980021469955
score on test set 0.8895391926235


In [84]:
def analyze_bn(df):
    (miller, final_exp) = factor_out_final_exp(df)
    (train, test) = split_df(miller)
    print("Fitting miller loop price")
    model_miller = analyze(train, test, ["six_u_plus_two_bit_length", "six_u_plus_two_hamming", "modulus_limbs", "group_limbs"], "single_pair_miller_time")
    
    (train, test) = split_df(final_exp)
    print("Fitting final exp price")
    model_final_exp = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs"], "final_exp_time")
    
    return (model_miller, model_final_exp)

In [85]:
(bn_miller, bn_final_exp) = analyze_bn(dataframes[1])

Train samples 872, test samples 97
Fitting miller loop price
score on training set 0.9554316087441047
score on test set 0.9636549175345519
Model accuracy before manual truncation of coefficients
Max absolute error 5335.093917450407 microseconds
Mean absolute error 890.2588772281415 microseconds
R2 score = 0.9636549175345519
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 5404.519193182359 microseconds
Mean absolute error 1049.5245653870857 microseconds
R2 score = 0.95235859382994
0.023439 * six_u_plus_two_bit_length^1 * modulus_limbs^1 + 1.853638 * modulus_limbs^2 + 20.273895 * modulus_limbs^1 * group_limbs^1 + 0.126171 * six_u_plus_two_bit_length^1 * modulus_limbs^2 + 0.155279 * six_u_plus_two_hamming^1 * modulus_limbs^2 + 4.762833 * modulus_limbs^2 * group_limbs^1
Train samples 872, test samples 97
Fitting final exp price
score on training set 0.8794578804082054
score on test set 0.8920151218791879
Model accuracy befo

In [86]:
def analyze_mnt(df, trunc_limit = 0.001):
    (miller, final_exp) = factor_out_final_exp(df)
    (train, test) = split_df(miller)
    print("Fitting miller loop price")
    model_miller = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs", "group_limbs"], "single_pair_miller_time", trunc_limit = trunc_limit)
    
    (train, test) = split_df(final_exp)
    print("Fitting final exp price")
    model_final_exp = analyze(train, test, ["exp_w0_bit_length", "exp_w0_hamming", "exp_w1_bit_length", "exp_w1_hamming", "modulus_limbs"], "final_exp_time", trunc_limit = trunc_limit, degree = 6)
    
    return (model_miller, model_final_exp)

In [87]:
(mnt4_miller, mnt4_final_exp) = analyze_mnt(dataframes[2], trunc_limit = 0.001)
print("\n\n\n")
(mnt4_miller, mnt4_final_exp) = analyze_mnt(dataframes[2], trunc_limit = 0.0)

Train samples 843, test samples 94
Fitting miller loop price
score on training set 0.9745941889991472
score on test set 0.984019644746227
Model accuracy before manual truncation of coefficients
Max absolute error 14320.078896705418 microseconds
Mean absolute error 2247.6804327400437 microseconds
R2 score = 0.984019644746227
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 14782.304448268547 microseconds
Mean absolute error 2332.6635122586595 microseconds
R2 score = 0.9820384470390778
0.517809 * x_bit_length^1 * modulus_limbs^1 + 0.043491 * x_hamming_weight^1 * modulus_limbs^1 + 3.595715 * modulus_limbs^2 + 20.287166 * modulus_limbs^1 * group_limbs^1 + 0.097823 * x_bit_length^1 * modulus_limbs^2 + 0.002467 * x_bit_length^1 * modulus_limbs^1 * group_limbs^1 + 0.119391 * x_hamming_weight^1 * modulus_limbs^2 + 5.417468 * modulus_limbs^2 * group_limbs^1 + 0.518857 * modulus_limbs^1 * group_limbs^2
Train samples 843, test samp

In [88]:
(mnt6_miller, mnt6_final_exp) = analyze_mnt(dataframes[3], trunc_limit = 0.001)
print("\n\n\n")
(mnt6_miller, mnt6_final_exp) = analyze_mnt(dataframes[3], trunc_limit = 0.0)

Train samples 822, test samples 92
Fitting miller loop price
score on training set 0.9829993490203128
score on test set 0.9745293333763084
Model accuracy before manual truncation of coefficients
Max absolute error 44172.42140309553 microseconds
Mean absolute error 3797.378266006613 microseconds
R2 score = 0.9745293333763084
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 44612.44449168883 microseconds
Mean absolute error 3748.7707957221587 microseconds
R2 score = 0.974125897285546
1.169968 * x_bit_length^1 * modulus_limbs^1 + 0.248469 * x_hamming_weight^1 * modulus_limbs^1 + 5.264963 * modulus_limbs^2 + 51.270023 * modulus_limbs^1 * group_limbs^1 + 0.173355 * x_bit_length^1 * modulus_limbs^2 + 0.005379 * x_bit_length^1 * modulus_limbs^1 * group_limbs^1 + 0.211521 * x_hamming_weight^1 * modulus_limbs^2 + 8.884313 * modulus_limbs^2 * group_limbs^1
Train samples 822, test samples 92
Fitting final exp price
score on trainin