In [121]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np

# file_list = [("BLS12", "./bls12/monte_carlo_f_exp_50000.csv"),
#             ("BN", "./bn/monte_carlo_f_exp_50000.csv"),
#             ("MNT4", "./mnt4/monte_carlo_f_exp_50000.csv"),
#             ("MNT6", "./mnt6/monte_carlo_f_exp_50000.csv")]

file_list = [("BLS12", "./bls12/monte_carlo_f_exp_55000.csv"),
            ("BN", "./bn/monte_carlo_f_exp_55000.csv"),
            ("MNT4", "./mnt4/monte_carlo_f_exp_55000.csv"),
            ("MNT6", "./mnt6/monte_carlo_f_exp_55000.csv")]



def get_dfs(files):
    results = []
    for file in files:
        (name, path) = file;
        df = pd.read_csv(path)
        df = df[df["x_is_negative"] == 1.0]
        #df.loc[:,"num_pairs"] *= 0.5
        df.drop("x_is_negative", axis = 1, inplace = True)
        results.append(df)
        
    return results

In [173]:
from sklearn import linear_model

def factor_out_final_exp(df, group_by = 3, skip_bad_fits = False):
    df_final_exps = pd.DataFrame(columns = df.columns);
    df_final_exps.drop("num_pairs", axis = 1, inplace = True);
    df_final_exps.drop("run_microseconds", axis = 1, inplace = True);
    df_final_exps["final_exp_time"] = 0.0

    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("run_microseconds", axis = 1, inplace = True);
    df_miller_loops["single_pair_miller_time"] = 0.0

    min_score = 1.0

    for k,g in df.groupby(np.arange(len(df))//group_by):
        reg = linear_model.LinearRegression(fit_intercept = True)
        model = reg.fit(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        score = model.score(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        if score < min_score:
            min_score = score
            
        if score < 0.90 and skip_bad_fits:
            continue
            
        g_miller = g.iloc[0].copy()
        g_miller.drop("run_microseconds", inplace = True)
        g_final_exp = g.iloc[0].copy()
        g_final_exp.drop("run_microseconds", inplace = True)
        g_miller["single_pair_miller_time"] = model.coef_[0][0];
        g_final_exp["final_exp_time"] = model.intercept_[0];

        g_miller.drop("num_pairs", inplace = True)
        g_final_exp.drop("num_pairs", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)
        
    print("Minimal final exp fitting score = {}".format(min_score))
        
    return (df_miller_loops, df_final_exps)

In [123]:
dataframes = get_dfs(file_list)

In [124]:
dataframes[0].head()

Unnamed: 0,x_bit_length,x_hamming_weight,modulus_limbs,group_limbs,num_pairs,run_microseconds
0,57,39,6,16,2,22083
1,57,39,6,16,4,25577
2,57,39,6,16,6,36677
3,64,7,7,11,2,15900
4,64,7,7,11,4,22633


In [125]:
dataframes[1].head()

Unnamed: 0,six_u_plus_two_bit_length,six_u_plus_two_hamming,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,run_microseconds
0,51,29,10,2,2,49,29,25174
1,51,29,10,2,4,49,29,28713
2,51,29,10,2,6,49,29,34342
3,5,1,6,4,2,2,2,7393
4,5,1,6,4,4,2,2,8684


In [126]:
dataframes[2].head()

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,exp_w0_bit_length,exp_w0_hamming,exp_w0_is_negative,exp_w1_bit_length,exp_w1_hamming,run_microseconds
0,4,2,2,119,118,1789,279,1,1580,1275,5357
1,4,2,4,119,118,1789,279,1,1580,1275,7141
2,4,2,6,119,118,1789,279,1,1580,1275,8935
3,10,1,2,371,284,792,321,1,1146,669,27374
4,10,1,4,371,284,792,321,1,1146,669,44052


In [127]:
dataframes[3].head()

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,exp_w0_bit_length,exp_w0_hamming,exp_w0_is_negative,exp_w1_bit_length,exp_w1_hamming,run_microseconds
0,9,9,2,1917,1474,1601,1371,1,672,17,219275
1,9,9,4,1917,1474,1601,1371,1,672,17,402700
2,9,9,6,1917,1474,1601,1371,1,672,17,606740
3,11,3,2,1740,1291,765,646,1,898,478,215073
4,11,3,4,1740,1291,765,646,1,898,478,425829


In [128]:
from sklearn.model_selection import train_test_split

def split_df(df):
    train, test = train_test_split(
        df, test_size=0.10, random_state=42)
    
    print("Train samples {}, test samples {}".format(len(train), len(test)))
    
    return (train, test)

In [129]:
from sklearn import linear_model
from sklearn import linear_model
from sklearn.linear_model import Lasso

from sklearn.metrics import max_error, mean_absolute_error, r2_score

def pretty_print_polynomial(poly, model, variable_names):
    terms = []

    for term_idx in range(0, poly.powers_.shape[0]):
        coeff = model.coef_[term_idx]
        if coeff == 0:
            continue
        coeff = np.around(coeff, decimals=6)
        subparts = []
        coeff_string = "{}".format(coeff)
        subparts.append(coeff_string)
        for variable_idx in range(0, poly.powers_.shape[1]):
            power = poly.powers_[term_idx, variable_idx]
            if power != 0:
                term_string = '{}^{}'.format(variable_names[variable_idx], power)
                subparts.append(term_string)
        if len(subparts) != 0:
            joined = " * ".join(subparts)
            terms.append(joined)

    polynomial_string = " + ".join(terms)
    print(polynomial_string)
    


def analyze(train, test, features, target, trunc_limit = 0.001, degree = 6):
    poly = PolynomialFeatures(degree = degree, include_bias = False)

    X_train = train[features]
    Y_train = train[target]

    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000,fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]

    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [130]:
def analyze_bls12(df):
    (miller, final_exp) = factor_out_final_exp(df)
    (train, test) = split_df(miller)
    print("Fitting miller loop price")
    model_miller = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs", "group_limbs"], "single_pair_miller_time")
    
    (train, test) = split_df(final_exp)
    print("Fitting final exp price")
    model_final_exp = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs"], "final_exp_time")
    
    return (model_miller, model_final_exp)

In [131]:
(bls_miller, bls_final_exp) = analyze_bls12(dataframes[0])

Train samples 9518, test samples 1058
Fitting miller loop price
score on training set 0.9779719430591235
score on test set 0.9802756246130122
Model accuracy before manual truncation of coefficients
Max absolute error 7473.377580372722 microseconds
Mean absolute error 546.5861013689263 microseconds
R2 score = 0.9802756246130122
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 7495.086831763099 microseconds
Mean absolute error 543.9214730284359 microseconds
R2 score = 0.980351845515199
0.378077 * x_bit_length^1 * modulus_limbs^1 + 0.101827 * x_hamming_weight^1 * modulus_limbs^1 + 18.116865 * modulus_limbs^1 * group_limbs^1 + 0.001745 * x_bit_length^1 * x_hamming_weight^1 * modulus_limbs^1 + 0.142627 * x_bit_length^1 * modulus_limbs^2 + 0.009235 * x_bit_length^1 * modulus_limbs^1 * group_limbs^1 + 0.169045 * x_hamming_weight^1 * modulus_limbs^2 + 4.770556 * modulus_limbs^2 * group_limbs^1 + 0.0016 * modulus_limbs^2 * group_

In [132]:
def analyze_bn(df):
    (miller, final_exp) = factor_out_final_exp(df)
    (train, test) = split_df(miller)
    print("Fitting miller loop price")
    model_miller = analyze(train, test, ["six_u_plus_two_bit_length", "six_u_plus_two_hamming", "modulus_limbs", "group_limbs"], "single_pair_miller_time")
    
    (train, test) = split_df(final_exp)
    print("Fitting final exp price")
    model_final_exp = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs"], "final_exp_time")
    
    return (model_miller, model_final_exp)

In [133]:
(bn_miller, bn_final_exp) = analyze_bn(dataframes[1])

Train samples 9417, test samples 1047
Fitting miller loop price
score on training set 0.9803164741835472
score on test set 0.9829824004300713
Model accuracy before manual truncation of coefficients
Max absolute error 6485.833986781602 microseconds
Mean absolute error 534.4015268817641 microseconds
R2 score = 0.9829824004300713
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 6350.113315148997 microseconds
Mean absolute error 530.9770159313209 microseconds
R2 score = 0.9832223185306923
0.236925 * six_u_plus_two_bit_length^1 * modulus_limbs^1 + 0.061988 * six_u_plus_two_hamming^1 * modulus_limbs^1 + 19.127274 * modulus_limbs^1 * group_limbs^1 + 0.002667 * six_u_plus_two_bit_length^1 * six_u_plus_two_hamming^1 * modulus_limbs^1 + 0.157434 * six_u_plus_two_bit_length^1 * modulus_limbs^2 + 0.15528 * six_u_plus_two_hamming^1 * modulus_limbs^2 + 4.772966 * modulus_limbs^2 * group_limbs^1
Train samples 9417, test samples 1047
Fi

In [137]:
def analyze_mnt(df, trunc_limit = 0.001):
    (miller, final_exp) = factor_out_final_exp(df)
    (train, test) = split_df(miller)
    print("Fitting miller loop price")
    model_miller = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs", "group_limbs"], "single_pair_miller_time", trunc_limit = trunc_limit)
    
    (train, test) = split_df(final_exp)
    print("Fitting final exp price")
    model_final_exp = analyze(train, test, ["exp_w0_bit_length", "exp_w0_hamming", "exp_w1_bit_length", "exp_w1_hamming", "modulus_limbs"], "final_exp_time", trunc_limit = trunc_limit, degree = 6)
    
    return (model_miller, model_final_exp)

In [138]:
(mnt4_miller, mnt4_final_exp) = analyze_mnt(dataframes[2], trunc_limit = 0.001)
print("\n\n\n")
(mnt4_miller, mnt4_final_exp) = analyze_mnt(dataframes[2], trunc_limit = 0.0)

Train samples 9458, test samples 1051
Fitting miller loop price
score on training set 0.990810453612425
score on test set 0.9872677133226724
Model accuracy before manual truncation of coefficients
Max absolute error 31709.005569665227 microseconds
Mean absolute error 1529.0306499396045 microseconds
R2 score = 0.9872677133226724
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 32910.466579074986 microseconds
Mean absolute error 1538.029793019711 microseconds
R2 score = 0.9869416414732952
0.483096 * x_bit_length^1 * modulus_limbs^1 + 0.249595 * x_hamming_weight^1 * modulus_limbs^1 + 1.343401 * modulus_limbs^2 + 22.410234 * modulus_limbs^1 * group_limbs^1 + 0.100338 * x_bit_length^1 * modulus_limbs^2 + 0.097977 * x_hamming_weight^1 * modulus_limbs^2 + 5.759813 * modulus_limbs^2 * group_limbs^1 + 0.0179 * modulus_limbs^1 * group_limbs^2 + 0.005582 * modulus_limbs^1 * group_limbs^3
Train samples 9458, test samples 1051
Fittin

In [136]:
(mnt6_miller, mnt6_final_exp) = analyze_mnt(dataframes[3], trunc_limit = 0.001)
print("\n\n\n")
(mnt6_miller, mnt6_final_exp) = analyze_mnt(dataframes[3], trunc_limit = 0.0)

Train samples 9382, test samples 1043
Fitting miller loop price
score on training set 0.9909648771792469
score on test set 0.9933533841953999
Model accuracy before manual truncation of coefficients
Max absolute error 26636.078553710657 microseconds
Mean absolute error 2424.7407084245783 microseconds
R2 score = 0.9933533841954
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 28089.403434414242 microseconds
Mean absolute error 2463.733056306811 microseconds
R2 score = 0.9932667326223179
1.043465 * x_bit_length^1 * modulus_limbs^1 + 0.666413 * x_hamming_weight^1 * modulus_limbs^1 + 2.398405 * modulus_limbs^2 + 43.091255 * modulus_limbs^1 * group_limbs^1 + 0.174695 * x_bit_length^1 * modulus_limbs^2 + 0.181491 * x_hamming_weight^1 * modulus_limbs^2 + 9.441175 * modulus_limbs^2 * group_limbs^1
Train samples 9382, test samples 1043
Fitting final exp price
score on training set 0.22623654365401533
score on test set 0.2235976765

In [153]:
def analyze_manual_poly(df, features_description, target, trunc_limit = 0.001, degree = 3):
    
    new_df = df.copy()
    features = []
    for feature in features_description:
        name, max_power = feature
        for i in range(1, max_power+1):
            subname = "{}^{}".format(name, i)
            new_df[subname] = new_df[name].apply(lambda x: x**i)
            features.append(subname)
            
    print(features)
            
    poly = PolynomialFeatures(degree = degree, interaction_only=True, include_bias = False)
        
    train, test = split_df(new_df)

    X_train = train[features]
    Y_train = train[target]
    
    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000, fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)
    
    print("Intercept = {}".format(lin.intercept_))

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]
    
    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [154]:
def analyze_mnt_final_exp(df, trunc_limit = 0.001):
    (miller, final_exp) = factor_out_final_exp(df)

    print("Fitting final exp price")
    model_final_exp = analyze_manual_poly(final_exp, [
        ("exp_w0_bit_length", 1), 
        ("exp_w0_hamming", 1), 
        ("exp_w1_bit_length", 1),
        ("exp_w1_hamming", 1),
        ("modulus_limbs", 12)], "final_exp_time", trunc_limit = trunc_limit, degree = 2)
    
    return model_final_exp

In [155]:
mnt4_final_exp_alt = analyze_mnt_final_exp(dataframes[2], trunc_limit = 0.001)
# print("\n\n\n") 
# mnt4_final_exp_alt = analyze_mnt_final_exp(dataframes[2], trunc_limit = 0.0)

Fitting final exp price
['exp_w0_bit_length^1', 'exp_w0_hamming^1', 'exp_w1_bit_length^1', 'exp_w1_hamming^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4', 'modulus_limbs^5', 'modulus_limbs^6', 'modulus_limbs^7', 'modulus_limbs^8', 'modulus_limbs^9', 'modulus_limbs^10', 'modulus_limbs^11', 'modulus_limbs^12']
Train samples 9458, test samples 1051
Intercept = 1001.4321467581049
score on training set 0.18283416211974268
score on test set 0.20068930079748215
Model accuracy before manual truncation of coefficients
Max absolute error 90828.97592921303 microseconds
Mean absolute error 3546.7899559125362 microseconds
R2 score = 0.20068930079748215
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 90826.66158626825 microseconds
Mean absolute error 3549.061775152095 microseconds
R2 score = 0.2001105835525162
0.136533 * exp_w0_bit_length^1^1 + 0.143178 * exp_w1_bit_length^1^1 + 5.843775 * modulus_limb

In [168]:
def analyze_manual_poly_separated(df, powered_features, linear_features, target, trunc_limit = 0.001):
    
    new_df = df.copy()
    sub_features = []
    for feature in powered_features:
        name, max_power = feature
        for i in range(1, max_power+1):
            subname = "{}^{}".format(name, i)
            new_df[subname] = new_df[name].apply(lambda x: x**i)
            sub_features.append(subname)
    
    features = []
    
    for subfeature in sub_features:
        for lin_feature in linear_features:
            subname = "{}*{}".format(subfeature, lin_feature)
            new_df[subname] = new_df.apply(lambda row: (row[subfeature]*row[lin_feature]), axis=1)
            features.append(subname)
            
            
    print(features)
        
    train, test = split_df(new_df)

    X_train = train[features]
    Y_train = train[target]

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000, fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)
    
    print("Intercept = {}".format(lin.intercept_))

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
#     pretty_print_polynomial(poly, lin, features)
    
    return lin

In [174]:
def analyze_mnt_final_exp_manual(df, trunc_limit = 0.001):
    (miller, final_exp) = factor_out_final_exp(df, skip_bad_fits = True)
    
    print(final_exp.head(25))

    print("Fitting final exp price")
    model_final_exp = analyze_manual_poly_separated(final_exp, [
        ("modulus_limbs", 12)], 
          ["exp_w0_bit_length", "exp_w0_hamming","exp_w1_bit_length","exp_w1_hamming"],
                              "final_exp_time", trunc_limit = trunc_limit)
    
    return model_final_exp

In [None]:
mnt4_final_exp_alt = analyze_mnt_final_exp_manual(dataframes[2], trunc_limit = 0.001)
# print("\n\n\n") 
# mnt4_final_exp_alt = analyze_mnt_final_exp(dataframes[2], trunc_limit = 0.0)