In [40]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np
import math

file_list = [("MNT4", "./mnt4/final_exp_parallel_1000.csv"),
            ("MNT6", "./mnt6/final_exp_parallel_1000.csv")]

gas_factor = 15

def get_dfs(files):
    results = []
    for file in files:
        (name, path) = file;
        df = pd.read_csv(path)
#         df = df[df["x_is_negative"] == 1.0]
        #df.loc[:,"num_pairs"] *= 0.5
        df.drop("x_is_negative", axis = 1, inplace = True)
        df.drop("num_pairs", axis = 1, inplace = True)
        df.drop("group_limbs", axis = 1, inplace = True)
        df.drop("exp_w0_is_negative", axis = 1, inplace = True)
        results.append(df)
        
    return results

In [43]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def factor_out_final_exp(df, group_by = 3, skip_bad_fits = False):
    df_final_exps = pd.DataFrame(columns = df.columns);
    df_final_exps.drop("num_pairs", axis = 1, inplace = True);
    df_final_exps.drop("run_microseconds", axis = 1, inplace = True);
    df_final_exps["final_exp_time"] = 0.0

    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("run_microseconds", axis = 1, inplace = True);
    df_miller_loops["single_pair_miller_time"] = 0.0

    min_score = 1.0

    for k,g in df.groupby(np.arange(len(df))//group_by):
#         reg = linear_model.LinearRegression(fit_intercept = True)
#         model = reg.fit(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        
        model = Lasso(alpha=0.0001,precompute=True,max_iter=1000,
            positive=True, random_state=9999, selection='random')
        model.fit(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        
        score = model.score(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        if score < min_score:
            min_score = score
            
        if score < 0.85 and skip_bad_fits:
#             print(g["num_pairs"])
#             print(g["run_microseconds"])
            continue
            
        slope = model.coef_[0];
        intercept = model.intercept_[0];
        
        if slope <= 1 or intercept <= 1:
            continue
            
        g_miller = g.iloc[0].copy()
        g_miller.drop("run_microseconds", inplace = True)
        g_final_exp = g.iloc[0].copy()
        g_final_exp.drop("run_microseconds", inplace = True)

#         g_miller["single_pair_miller_time"] = model.coef_[0][0];
        g_miller["single_pair_miller_time"] = slope;
        
        g_final_exp["final_exp_time"] = intercept;

        g_miller.drop("num_pairs", inplace = True)
        g_final_exp.drop("num_pairs", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)
        
    print("Minimal final exp fitting score = {}".format(min_score))
        
    return (df_miller_loops, df_final_exps)

In [3]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def factor_out_final_exp_non_negative(df, group_by = 3, skip_bad_fits = False):
    df_final_exps = pd.DataFrame(columns = df.columns);
    df_final_exps.drop("num_pairs", axis = 1, inplace = True);
    df_final_exps.drop("run_microseconds", axis = 1, inplace = True);
    df_final_exps["final_exp_time"] = 0.0

    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("run_microseconds", axis = 1, inplace = True);
    df_miller_loops["single_pair_miller_time"] = 0.0

    min_score = 1.0

    for k,g in df.groupby(np.arange(len(df))//group_by):
        g_copy = pd.DataFrame(g["num_pairs"])
        g_copy["intercept"] = 1.0
        
        model, res = nnls(g_copy[["num_pairs", "intercept"]][:], g["run_microseconds"][:])
            
        g_miller = g.iloc[0].copy()
        g_miller.drop("run_microseconds", inplace = True)
        g_final_exp = g.iloc[0].copy()
        g_final_exp.drop("run_microseconds", inplace = True)
        
        g_miller["single_pair_miller_time"] = model[0];
        g_final_exp["final_exp_time"] = model[1];

        g_miller.drop("num_pairs", inplace = True)
        g_final_exp.drop("num_pairs", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)
        
    print("Minimal final exp fitting score = {}".format(min_score))
        
    return (df_miller_loops, df_final_exps)

In [32]:
dataframes = get_dfs(file_list)

In [33]:
dataframes[0].head(15)

Unnamed: 0,modulus_limbs,x_bit_length,x_hamming_weight,exp_w0_bit_length,exp_w0_hamming,exp_w1_bit_length,exp_w1_hamming,run_microseconds
0,4,342,21,1911,532,799,454,4928
1,7,342,21,1911,532,799,454,14243
2,10,342,21,1911,532,799,454,31337
3,4,798,183,208,43,130,101,7843
4,13,342,21,1911,532,799,454,60451
5,4,1969,401,287,228,1438,1132,20811
6,7,709,550,98,61,942,78,26377
7,4,1443,1187,1388,450,1670,44,21750
8,4,191,144,832,81,1475,1369,4196
9,7,1732,1104,418,337,955,492,48844


In [34]:
dataframes[1].head(15)

Unnamed: 0,modulus_limbs,x_bit_length,x_hamming_weight,exp_w0_bit_length,exp_w0_hamming,exp_w1_bit_length,exp_w1_hamming,run_microseconds
0,4,1514,957,62,52,139,5,34793
1,7,1077,79,1569,1481,1562,1230,48413
2,4,1384,294,1113,457,1394,1100,28024
3,4,51,41,1537,617,1144,547,4760
4,4,1792,858,281,216,48,16,39852
5,7,1537,617,1144,547,297,146,78644
6,10,1811,110,602,170,1193,704,152224
7,4,1912,1776,148,104,1699,1694,51953
8,13,790,175,1506,980,1436,184,167745
9,4,1428,1110,1484,1294,1739,191,40720


In [47]:
from sklearn.model_selection import train_test_split

def split_df(df):
    train, test = train_test_split(
        df, test_size=0.10, random_state=42)
    
    print("Train samples {}, test samples {}".format(len(train), len(test)))
    
    return (train, test)

In [56]:
from sklearn import linear_model
from sklearn.linear_model import Lasso

from sklearn.metrics import max_error, mean_absolute_error, r2_score

def pretty_print_polynomial(poly, model, variable_names):
    terms = []

    for term_idx in range(0, poly.powers_.shape[0]):
        coeff = model.coef_[term_idx]
        if coeff == 0:
            continue
        coeff = np.around(coeff, decimals=6)
        subparts = []
        coeff_string = "{}".format(coeff)
        subparts.append(coeff_string)
        for variable_idx in range(0, poly.powers_.shape[1]):
            power = poly.powers_[term_idx, variable_idx]
            if power != 0:
                if power == 1:
                    term_string = '{}'.format(variable_names[variable_idx])
                    subparts.append(term_string)
                else:
                    term_string = '{}^{}'.format(variable_names[variable_idx], power)
                    subparts.append(term_string)
        if len(subparts) != 0:
            joined = " * ".join(subparts)
            terms.append(joined)

    polynomial_string = " + ".join(terms)
    print(polynomial_string)

In [57]:
def make_averages(df):
    average = df.groupby(['modulus_limbs', "x_bit_length", "x_hamming_weight", "exp_w0_bit_length", "exp_w0_hamming", "exp_w1_bit_length", "exp_w1_hamming"]).mean()
    average["gas"] = average["run_microseconds"].apply(lambda x: gas_factor * math.ceil(x))
    average = average.copy()
    average.reset_index(inplace=True) 
    average.drop("run_microseconds", axis = 1, inplace = True)

    return average

In [58]:
mnt4 = make_averages(dataframes[0])
mnt4.head(15)

Unnamed: 0,modulus_limbs,x_bit_length,x_hamming_weight,exp_w0_bit_length,exp_w0_hamming,exp_w1_bit_length,exp_w1_hamming,gas
0,4,1,1,263,171,600,453,93840
1,4,1,1,383,228,2013,45,63495
2,4,1,1,740,639,294,48,118800
3,4,1,1,799,685,860,146,119835
4,4,1,1,1241,1232,989,595,60465
5,4,1,1,1654,1073,219,74,62580
6,4,1,1,1708,183,216,205,125070
7,4,2,1,51,26,638,568,73155
8,4,2,1,65,63,565,360,119880
9,4,2,1,1018,427,1375,583,63510


In [67]:
mnt6 = make_averages(dataframes[1])
mnt6.head(15)

Unnamed: 0,modulus_limbs,x_bit_length,x_hamming_weight,exp_w0_bit_length,exp_w0_hamming,exp_w1_bit_length,exp_w1_hamming,gas
0,4,1,1,206,121,1024,336,39165
1,4,1,1,411,206,1786,1191,76155
2,4,1,1,1013,908,1771,766,46230
3,4,1,1,1418,618,1504,691,80535
4,4,1,1,1528,321,215,213,130035
5,4,1,1,1555,967,1793,853,102060
6,4,1,1,1646,48,1729,1221,208905
7,4,1,1,1715,164,1624,718,184455
8,4,2,1,818,753,1858,691,188670
9,4,2,1,1228,960,991,6,135690


In [59]:
def analyze_manual_poly(df, features_description, target, trunc_limit = 0.001, degree = 3):
    
    new_df = df.copy()
    features = []
    for feature in features_description:
        name, max_power = feature
        for i in range(1, max_power+1):
            subname = "{}^{}".format(name, i)
            new_df[subname] = new_df[name].apply(lambda x: x**i)
            features.append(subname)
            
    print(features)
            
    poly = PolynomialFeatures(degree = degree, interaction_only=True, include_bias = False)
        
    train, test = split_df(new_df)

    X_train = train[features]
    Y_train = train[target]
    
    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000, fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)
    
    print("Intercept = {}".format(lin.intercept_))

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]
    
    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [64]:
def analyze_mnt_final_exp(df, trunc_limit = 0.001, modulus_power = 6):
    print("Fitting final exp price")
    model_final_exp = analyze_manual_poly(df, [
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("exp_w0_bit_length", 1), 
        ("exp_w0_hamming", 1), 
        ("exp_w1_bit_length", 1),
        ("exp_w1_hamming", 1),
        ("modulus_limbs", modulus_power)], "gas", trunc_limit = trunc_limit, degree = 2)
    
    return model_final_exp

In [65]:
mnt4_final_exp = analyze_mnt_final_exp(mnt4, trunc_limit = 0.001, modulus_power = 4)

Fitting final exp price
['x_bit_length^1', 'x_hamming_weight^1', 'exp_w0_bit_length^1', 'exp_w0_hamming^1', 'exp_w1_bit_length^1', 'exp_w1_hamming^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4']
Train samples 171393, test samples 19044
Intercept = 0.0
score on training set 0.9653543153843204
score on test set 0.9652089320240125
Model accuracy before manual truncation of coefficients
Max absolute error 1146374.4629738294 microseconds
Mean absolute error 131133.86125933478 microseconds
R2 score = 0.9652089320240125
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 1142951.5823400202 microseconds
Mean absolute error 131164.41023098654 microseconds
R2 score = 0.9652078174421049
0.00569 * exp_w1_bit_length^1 + 8318.549641 * modulus_limbs^1 + 1822.237078 * modulus_limbs^2 + 16.372673 * x_bit_length^1 * modulus_limbs^1 + 3.780107 * x_bit_length^1 * modulus_limbs^2 + 0.001652 * x_hamming_weight^1 *

In [68]:
mnt6_final_exp = analyze_mnt_final_exp(mnt6, trunc_limit = 0.001, modulus_power = 6)

Fitting final exp price
['x_bit_length^1', 'x_hamming_weight^1', 'exp_w0_bit_length^1', 'exp_w0_hamming^1', 'exp_w1_bit_length^1', 'exp_w1_hamming^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4', 'modulus_limbs^5', 'modulus_limbs^6']
Train samples 183260, test samples 20363
Intercept = 0.0
score on training set 0.968950360847339
score on test set 0.9693617431724423
Model accuracy before manual truncation of coefficients
Max absolute error 2367106.6833289554 microseconds
Mean absolute error 225540.1251108219 microseconds
R2 score = 0.9693617431724423
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 2367106.6833289554 microseconds
Mean absolute error 225540.1251108219 microseconds
R2 score = 0.9693617431724423
14082.084517 * modulus_limbs^1 + 3049.940908 * modulus_limbs^2 + 35.589422 * x_bit_length^1 * modulus_limbs^1 + 6.554446 * x_bit_length^1 * modulus_limbs^2 + 28.927543 * x_hamming_weigh