In [116]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np
import math

file_list = [("MNT4", "./mnt4/final_exp_parallel_1500.csv"),
            ("MNT6", "./mnt6/final_exp_parallel_1500.csv")]

gas_factor = 15

num_pairs = 2
x_bits = 1
x_hamming = 1

def get_dfs(files):
    results = []
    for file in files:
        (name, path) = file;
        df = pd.read_csv(path)
#         df = df[df["x_is_negative"] == 1.0]
        #df.loc[:,"num_pairs"] *= 0.5
        df.drop("x_is_negative", axis = 1, inplace = True)
        df.drop("x_bit_length", axis = 1, inplace = True)
        df.drop("x_hamming_weight", axis = 1, inplace = True)
        df.drop("num_pairs", axis = 1, inplace = True)
        df.drop("exp_w0_is_negative", axis = 1, inplace = True)
        results.append(df)
        
    return results

In [43]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def factor_out_final_exp(df, group_by = 3, skip_bad_fits = False):
    df_final_exps = pd.DataFrame(columns = df.columns);
    df_final_exps.drop("num_pairs", axis = 1, inplace = True);
    df_final_exps.drop("run_microseconds", axis = 1, inplace = True);
    df_final_exps["final_exp_time"] = 0.0

    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("run_microseconds", axis = 1, inplace = True);
    df_miller_loops["single_pair_miller_time"] = 0.0

    min_score = 1.0

    for k,g in df.groupby(np.arange(len(df))//group_by):
#         reg = linear_model.LinearRegression(fit_intercept = True)
#         model = reg.fit(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        
        model = Lasso(alpha=0.0001,precompute=True,max_iter=1000,
            positive=True, random_state=9999, selection='random')
        model.fit(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        
        score = model.score(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        if score < min_score:
            min_score = score
            
        if score < 0.85 and skip_bad_fits:
#             print(g["num_pairs"])
#             print(g["run_microseconds"])
            continue
            
        slope = model.coef_[0];
        intercept = model.intercept_[0];
        
        if slope <= 1 or intercept <= 1:
            continue
            
        g_miller = g.iloc[0].copy()
        g_miller.drop("run_microseconds", inplace = True)
        g_final_exp = g.iloc[0].copy()
        g_final_exp.drop("run_microseconds", inplace = True)

#         g_miller["single_pair_miller_time"] = model.coef_[0][0];
        g_miller["single_pair_miller_time"] = slope;
        
        g_final_exp["final_exp_time"] = intercept;

        g_miller.drop("num_pairs", inplace = True)
        g_final_exp.drop("num_pairs", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)
        
    print("Minimal final exp fitting score = {}".format(min_score))
        
    return (df_miller_loops, df_final_exps)

In [3]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def factor_out_final_exp_non_negative(df, group_by = 3, skip_bad_fits = False):
    df_final_exps = pd.DataFrame(columns = df.columns);
    df_final_exps.drop("num_pairs", axis = 1, inplace = True);
    df_final_exps.drop("run_microseconds", axis = 1, inplace = True);
    df_final_exps["final_exp_time"] = 0.0

    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("run_microseconds", axis = 1, inplace = True);
    df_miller_loops["single_pair_miller_time"] = 0.0

    min_score = 1.0

    for k,g in df.groupby(np.arange(len(df))//group_by):
        g_copy = pd.DataFrame(g["num_pairs"])
        g_copy["intercept"] = 1.0
        
        model, res = nnls(g_copy[["num_pairs", "intercept"]][:], g["run_microseconds"][:])
            
        g_miller = g.iloc[0].copy()
        g_miller.drop("run_microseconds", inplace = True)
        g_final_exp = g.iloc[0].copy()
        g_final_exp.drop("run_microseconds", inplace = True)
        
        g_miller["single_pair_miller_time"] = model[0];
        g_final_exp["final_exp_time"] = model[1];

        g_miller.drop("num_pairs", inplace = True)
        g_final_exp.drop("num_pairs", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)
        
    print("Minimal final exp fitting score = {}".format(min_score))
        
    return (df_miller_loops, df_final_exps)

In [117]:
dataframes = get_dfs(file_list)

In [118]:
dataframes[0].head(15)

Unnamed: 0,modulus_limbs,group_limbs,exp_w0_bit_length,exp_w0_hamming,exp_w1_bit_length,exp_w1_hamming,run_microseconds
0,4,1,342,21,1911,532,1625
1,4,1,619,519,1439,1043,1493
2,7,5,342,21,1911,532,7986
3,4,1,478,101,644,212,1022
4,4,1,1870,1838,1727,562,1994
5,4,1,145,145,1636,417,1328
6,4,1,782,540,1899,455,1678
7,10,9,342,21,1911,532,22073
8,4,1,1775,576,1190,120,1947
9,7,5,388,4,1629,942,7255


In [119]:
dataframes[1].head(15)

Unnamed: 0,modulus_limbs,group_limbs,exp_w0_bit_length,exp_w0_hamming,exp_w1_bit_length,exp_w1_hamming,run_microseconds
0,4,1,1046,11,1514,957,2867
1,4,1,1436,184,1226,728,2893
2,4,1,1352,543,1711,601,3215
3,4,1,1516,450,1048,153,2940
4,7,5,517,216,1077,79,11732
5,4,1,1494,33,1587,17,3249
6,4,1,643,401,130,64,1423
7,4,1,401,154,1236,400,2063
8,4,1,1841,699,1081,421,3066
9,4,1,1733,1324,1212,72,3125


In [120]:
from sklearn.model_selection import train_test_split

def split_df(df):
    train, test = train_test_split(
        df, test_size=0.10, random_state=42)
    
    print("Train samples {}, test samples {}".format(len(train), len(test)))
    
    return (train, test)

In [121]:
from sklearn import linear_model
from sklearn.linear_model import Lasso

from sklearn.metrics import max_error, mean_absolute_error, r2_score

def pretty_print_polynomial(poly, model, variable_names):
    terms = []

    for term_idx in range(0, poly.powers_.shape[0]):
        coeff = model.coef_[term_idx]
        if coeff == 0:
            continue
        coeff = np.around(coeff, decimals=6)
        subparts = []
        coeff_string = "{}".format(coeff)
        subparts.append(coeff_string)
        for variable_idx in range(0, poly.powers_.shape[1]):
            power = poly.powers_[term_idx, variable_idx]
            if power != 0:
                if power == 1:
                    term_string = '{}'.format(variable_names[variable_idx])
                    subparts.append(term_string)
                else:
                    term_string = '{}^{}'.format(variable_names[variable_idx], power)
                    subparts.append(term_string)
        if len(subparts) != 0:
            joined = " * ".join(subparts)
            terms.append(joined)

    polynomial_string = " + ".join(terms)
    print(polynomial_string)

In [122]:
mnt4_one_off = pd.read_csv("./mnt4/one_off_results.csv")

mnt4_one_off.head(15)

Unnamed: 0,modulus_limbs,gas
0,4,1455
1,5,2250
2,6,3135
3,7,4515
4,8,6435
5,9,8625
6,10,11145
7,11,14130
8,12,18210
9,13,22410


In [123]:
mnt6_one_off = pd.read_csv("./mnt6/one_off_results.csv")

mnt6_one_off.head(15)

Unnamed: 0,modulus_limbs,gas
0,4,2085
1,5,3855
2,6,4815
3,7,7290
4,8,9855
5,9,14070
6,10,18255
7,11,22095
8,12,29310
9,13,34710


In [124]:
from joblib import load

mnt4_model_description = load("mnt4_miller.joblib")
mnt6_model_description = load("mnt6_miller.joblib")

In [157]:
def evaluate_model(x, model_description, degree = 2):
    model = model_description["model"]
    features_description = model_description["features"]
    
    df = pd.DataFrame()

#     features = []
    for feature in features_description:
        name, max_power = feature
        if name not in ["x_bit_length", "x_hamming_weight"]:
            for i in range(1, max_power+1):
                subname = "{}^{}".format(name, i)
                df[subname] = [x[name] ** i]
#                 features.append(subname)
        elif name == "x_bit_length":
            for i in range(1, max_power+1):
                subname = "{}^{}".format(name, i)
                df[subname] = [x_bits ** i]
#                 features.append(subname)
        elif name == "x_hamming_weight":
            for i in range(1, max_power+1):
                subname = "{}^{}".format(name, i)
                df[subname] = [x_hamming ** i]
#                 features.append(subname)
                
    poly = PolynomialFeatures(degree = degree, interaction_only=True, include_bias = False)
    
    X = poly.fit_transform(df)
    
    result = model.predict(X)
    
    return result[0]

In [158]:
def apply_correction(x, one_offs, miller_model_description):
    corrected = x["gas"] - one_offs[one_offs["modulus_limbs"] == x["modulus_limbs"]]["gas"].array[0]
    miller_contribution = num_pairs * evaluate_model(x, miller_model_description)
    corrected -= miller_contribution
    
    return corrected
    

In [159]:
def correct_for_parsing_and_miller(df, one_offs, miller_model_description):
    average = df.copy()
    average["gas"] = average["run_microseconds"].apply(lambda x: gas_factor * math.ceil(x))
    average["gas_corrected"] = average.apply(lambda x: apply_correction(x, one_offs, miller_model_description), axis = 1)
    average.drop("run_microseconds", axis = 1, inplace = True)
    average.drop("group_limbs", axis = 1, inplace = True)
    average.drop("gas", axis = 1, inplace = True)
    
    return average

In [160]:
mnt4 = correct_for_parsing_and_miller(dataframes[0], mnt4_one_off, mnt4_model_description)
mnt4.head(15)

Unnamed: 0,modulus_limbs,group_limbs,exp_w0_bit_length,exp_w0_hamming,exp_w1_bit_length,exp_w1_hamming,gas,gas_corrected
0,4,1,342,21,1911,532,24375,14739.8856
1,4,1,619,519,1439,1043,22395,12759.8856
2,7,5,342,21,1911,532,119790,29707.050536
3,4,1,478,101,644,212,15330,5694.8856
4,4,1,1870,1838,1727,562,29910,20274.8856
5,4,1,145,145,1636,417,19920,10284.8856
6,4,1,782,540,1899,455,25170,15534.8856
7,10,9,342,21,1911,532,331095,51539.54215
8,4,1,1775,576,1190,120,29205,19569.8856
9,7,5,388,4,1629,942,108825,18742.050536


In [161]:
mnt6 = correct_for_parsing_and_miller(dataframes[1], mnt6_one_off, mnt6_model_description)
mnt6.head(15)

Unnamed: 0,modulus_limbs,group_limbs,exp_w0_bit_length,exp_w0_hamming,exp_w1_bit_length,exp_w1_hamming,gas,gas_corrected
0,4,1,1046,11,1514,957,43005,26698.419833
1,4,1,1436,184,1226,728,43395,27088.419833
2,4,1,1352,543,1711,601,48225,31918.419833
3,4,1,1516,450,1048,153,44100,27793.419833
4,7,5,517,216,1077,79,175980,22648.568875
5,4,1,1494,33,1587,17,48735,32428.419833
6,4,1,643,401,130,64,21345,5038.419833
7,4,1,401,154,1236,400,30945,14638.419833
8,4,1,1841,699,1081,421,45990,29683.419833
9,4,1,1733,1324,1212,72,46875,30568.419833


In [169]:
def analyze_manual_poly(df, features_description, target, trunc_limit = 0.001, degree = 3):
    
    new_df = df.copy()
    features = []
    for feature in features_description:
        name, max_power = feature
        for i in range(1, max_power+1):
            subname = "{}^{}".format(name, i)
            new_df[subname] = new_df[name].apply(lambda x: x**i)
            features.append(subname)
            
    print(features)
            
    poly = PolynomialFeatures(degree = degree, interaction_only=True, include_bias = False)
        
    train, test = split_df(new_df)

    X_train = train[features]
    Y_train = train[target]
    
    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000, fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)
    
    print("Intercept = {}".format(lin.intercept_))

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]
    
    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [170]:
def analyze_mnt_final_exp(df, trunc_limit = 0.001, modulus_power = 6):
    print("Fitting final exp price")
    model_final_exp = analyze_manual_poly(df, [
        ("exp_w0_bit_length", 1), 
        ("exp_w0_hamming", 1), 
        ("exp_w1_bit_length", 1),
        ("exp_w1_hamming", 1),
        ("modulus_limbs", modulus_power)], "gas_corrected", trunc_limit = trunc_limit, degree = 2)
    
    return model_final_exp

In [171]:
mnt4_final_exp = analyze_mnt_final_exp(mnt4, trunc_limit = 0.001, modulus_power = 4)

Fitting final exp price
['exp_w0_bit_length^1', 'exp_w0_hamming^1', 'exp_w1_bit_length^1', 'exp_w1_hamming^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4']
Train samples 280800, test samples 31200
Intercept = 0.0
score on training set 0.8749532081596542
score on test set 0.8743848078657439
Model accuracy before manual truncation of coefficients
Max absolute error 323681.46151863155 microseconds
Mean absolute error 10047.672434648233 microseconds
R2 score = 0.8743848078657439
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 322950.15284376155 microseconds
Mean absolute error 10064.245363138563 microseconds
R2 score = 0.8741883158388741
0.271691 * exp_w0_bit_length^1 * modulus_limbs^2 + 0.268846 * exp_w1_bit_length^1 * modulus_limbs^2


In [172]:
mnt6_final_exp = analyze_mnt_final_exp(mnt6, trunc_limit = 0.001, modulus_power = 6)

Fitting final exp price
['exp_w0_bit_length^1', 'exp_w0_hamming^1', 'exp_w1_bit_length^1', 'exp_w1_hamming^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4', 'modulus_limbs^5', 'modulus_limbs^6']
Train samples 280800, test samples 31200
Intercept = 0.0
score on training set 0.8778198145819266
score on test set 0.8755852589840015
Model accuracy before manual truncation of coefficients
Max absolute error 560971.3564491379 microseconds
Mean absolute error 17607.70488335559 microseconds
R2 score = 0.8755852589840015
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 559461.2624827889 microseconds
Mean absolute error 17543.9863253341 microseconds
R2 score = 0.8755100974767166
1.163557 * exp_w0_bit_length^1 * modulus_limbs^1 + 0.410943 * exp_w0_bit_length^1 * modulus_limbs^2 + 1.147779 * exp_w1_bit_length^1 * modulus_limbs^1 + 0.413714 * exp_w1_bit_length^1 * modulus_limbs^2
