In [1]:
gas_factor = 30

In [2]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np
import math

# !!! Determinism !!!
np.random.seed(42)

file_list = [("MNT4", "./mnt4/miller_loop_parallel_3000.csv"),
            ("MNT6", "./mnt6/miller_loop_parallel_3000.csv")]

def get_dfs(files):
    results = []
    for file in files:
        (name, path) = file;
        df = pd.read_csv(path)
        df.drop("x_is_negative", axis = 1, inplace = True)
#         df.drop("num_pairs", axis = 1, inplace = True)
        df.drop("group_limbs", axis = 1, inplace = True)
#         df.drop("x_bit_length", axis = 1, inplace = True)
#         df.drop("x_hamming_weight", axis = 1, inplace = True)
        df.drop("exp_w0_bit_length", axis = 1, inplace = True)
        df.drop("exp_w0_hamming", axis = 1, inplace = True)
        df.drop("exp_w0_is_negative", axis = 1, inplace = True)
        df.drop("exp_w1_bit_length", axis = 1, inplace = True)
        df.drop("exp_w1_hamming", axis = 1, inplace = True)
        results.append(df)
        
    return results

In [3]:
dataframes = get_dfs(file_list)

In [4]:
dataframes[0].head(15)

Unnamed: 0,modulus_limbs,num_pairs,x_bit_length,x_hamming_weight,run_microseconds
0,4,2,342,21,3907
1,5,2,342,21,4922
2,7,2,342,21,8003
3,4,4,342,21,7067
4,10,2,342,21,13650
5,5,4,342,21,8480
6,7,4,342,21,12180
7,4,8,342,21,10863
8,5,8,342,21,13943
9,10,4,342,21,23077


In [5]:
dataframes[1].head(15)

Unnamed: 0,modulus_limbs,num_pairs,x_bit_length,x_hamming_weight,run_microseconds
0,4,2,1002,748,25755
1,5,2,1160,1023,43553
2,4,4,1002,748,51543
3,7,2,672,17,23008
4,5,4,1160,1023,84701
5,4,8,1002,748,101070
6,7,4,672,17,46254
7,7,8,672,17,88252
8,5,8,1160,1023,167938
9,10,2,573,337,54539


In [6]:
mnt4_one_off = pd.read_csv("./mnt4/one_off_results.csv")

mnt4_one_off.head(15)

Unnamed: 0,modulus_limbs,gas
0,4,2400
1,5,3540
2,6,4800
3,7,6540
4,8,8670
5,9,11130
6,10,13860
7,11,17130
8,12,21030
9,13,24930


In [7]:
mnt6_one_off = pd.read_csv("./mnt6/one_off_results.csv")

mnt6_one_off.head(15)

Unnamed: 0,modulus_limbs,gas
0,4,2430
1,5,3570
2,6,4860
3,7,6570
4,8,8910
5,9,11160
6,10,13890
7,11,17160
8,12,21300
9,13,25080


In [28]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def correct_and_fit(df, one_offs, skip_bad_fits = False):
    min_score = 1.0
    
    new_df = df.copy()
    new_df["gas"] = new_df["run_microseconds"].apply(lambda x: gas_factor * math.ceil(x))
    new_df.drop("run_microseconds", axis = 1, inplace = True)
    new_df["gas_corrected"] = new_df.apply(lambda x: x["gas"] - one_offs[one_offs["modulus_limbs"] == x["modulus_limbs"]]["gas"].array[0], axis=1)
    new_df.drop("gas", axis = 1, inplace = True)
    
    leftovers = new_df[new_df["gas_corrected"] <= 0]
    print("Leftover rows (corrected gas < 0, noise artifacts) = {}".format(leftovers.shape[0]))
    
    new_df = new_df[new_df["gas_corrected"] > 0].copy()
    print("Data rows (corrected gas > 0, reasonable (even if noisy)) = {}".format(new_df.shape[0]))
    
    df_miller_loops = pd.DataFrame(columns = new_df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops["miller_gas"] = 0.0
    
    for (k, g) in new_df.groupby(['modulus_limbs', "x_bit_length", "x_hamming_weight"]):
        g = g.copy()
        
        model = Lasso(alpha=0.0001,precompute=True,max_iter=1000,
            positive=True, random_state=9999, selection='random', fit_intercept = False)
        model.fit(g["num_pairs"][:, np.newaxis], g["gas_corrected"][:, np.newaxis])
        
        score = model.score(g["num_pairs"][:, np.newaxis], g["gas_corrected"][:, np.newaxis])
        if score < min_score:
            min_score = score
            
        if score < 0.85 and skip_bad_fits:
#             print(g["num_pairs"])
#             print(g["run_microseconds"])
            continue
            
        slope = model.coef_[0];
        intercept = model.intercept_;
#         intercept = model.intercept_[0];
        
#         if slope <= 1 or intercept <= 1:
#             continue
            
        g_miller = g.iloc[0].copy()

        g_miller["miller_gas"] = slope;

        g_miller.drop("num_pairs", inplace = True)
        g_miller.drop("gas_corrected", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        
    print("Min score = {}".format(min_score))
        
    return df_miller_loops



In [29]:
mnt4 = correct_and_fit(dataframes[0], mnt4_one_off, skip_bad_fits = True)
mnt4.head(25)

Leftover rows (corrected gas < 0, noise artifacts) = 0
Data rows (corrected gas > 0, reasonable (even if noisy)) = 156000
Min score = -0.05760226846084504


Unnamed: 0,modulus_limbs,x_bit_length,x_hamming_weight,gas_corrected,miller_gas
10819,4.0,2.0,1.0,,347.647058
5052,4.0,2.0,2.0,,454.588234
5965,4.0,3.0,3.0,,708.882352
8084,4.0,4.0,3.0,,826.499999
21855,4.0,5.0,2.0,,884.647058
14554,4.0,8.0,8.0,,2071.941175
16525,4.0,10.0,2.0,,1730.647058
4856,4.0,10.0,6.0,,1965.529411
1300,4.0,11.0,11.0,,2525.294116
24058,4.0,13.0,11.0,,2887.411764


In [30]:
mnt6 = correct_and_fit(dataframes[1], mnt6_one_off, skip_bad_fits = True)
mnt6.head(25)

Leftover rows (corrected gas < 0, noise artifacts) = 1
Data rows (corrected gas > 0, reasonable (even if noisy)) = 155997
Min score = 0.8479568696346854


Unnamed: 0,modulus_limbs,x_bit_length,x_hamming_weight,gas_corrected,miller_gas
8370,4.0,1.0,1.0,,264.117646
4925,4.0,2.0,1.0,,500.294116
13697,4.0,3.0,2.0,,951.705881
27555,4.0,5.0,5.0,,2137.764705
1516,4.0,6.0,4.0,,2140.058822
1298,4.0,6.0,6.0,,2550.35294
23134,4.0,7.0,2.0,,2032.588234
9984,4.0,7.0,4.0,,2512.764705
20593,4.0,7.0,7.0,,3086.647058
2402,4.0,9.0,5.0,,3260.999999


In [39]:
from sklearn.model_selection import train_test_split

def split_df(df):
    train, test = train_test_split(
        df, test_size=0.10, random_state=42)
    
    print("Train samples {}, test samples {}".format(len(train), len(test)))
    
    return (train, test)

In [40]:
from sklearn import linear_model
from sklearn.linear_model import Lasso

from sklearn.metrics import max_error, mean_absolute_error, r2_score

def pretty_print_polynomial(poly, model, variable_names):
    terms = []

    for term_idx in range(0, poly.powers_.shape[0]):
        coeff = model.coef_[term_idx]
        if coeff == 0:
            continue
        coeff = np.around(coeff, decimals=6)
        subparts = []
        coeff_string = "{}".format(coeff)
        subparts.append(coeff_string)
        for variable_idx in range(0, poly.powers_.shape[1]):
            power = poly.powers_[term_idx, variable_idx]
            if power != 0:
                if power == 1:
                    term_string = '{}'.format(variable_names[variable_idx])
                    subparts.append(term_string)
                else:
                    term_string = '{}^{}'.format(variable_names[variable_idx], power)
                    subparts.append(term_string)
        if len(subparts) != 0:
            joined = " * ".join(subparts)
            terms.append(joined)

    polynomial_string = " + ".join(terms)
    print(polynomial_string)

In [41]:
def analyze_manual_poly(df, features_description, target, trunc_limit = 0.001, degree = 2):
    
    new_df = df.copy()
    features = []
    for feature in features_description:
        name, max_power = feature
        for i in range(1, max_power+1):
            subname = "{}^{}".format(name, i)
            new_df[subname] = new_df[name].apply(lambda x: x**i)
            features.append(subname)
            
    print(features)
            
    poly = PolynomialFeatures(degree = degree, interaction_only=True, include_bias = False)
        
    train, test = split_df(new_df)

    X_train = train[features]
    Y_train = train[target]
    
    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000, fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)
    
    print("Intercept = {}".format(lin.intercept_))

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]
    
    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [42]:
def analyze_mnt(df, trunc_limit = 0.001, modulus_power = 6):
    print("Fitting miller loop price")
    model = analyze_manual_poly(df, [
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("modulus_limbs", modulus_power)], "miller_gas", trunc_limit = trunc_limit, degree = 2)
    return model

In [43]:
mnt4_pairs_pairsing = analyze_mnt(mnt4, trunc_limit = 0.001, modulus_power = 4)

Fitting final exp price
['x_bit_length^1', 'x_hamming_weight^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4']
Train samples 35005, test samples 3890
Intercept = 0.0
score on training set 0.9912893801393658
score on test set 0.9907802076026703
Model accuracy before manual truncation of coefficients
Max absolute error 883263.6826318796 microseconds
Mean absolute error 37751.11379394999 microseconds
R2 score = 0.9907802076026704
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 883263.6826318796 microseconds
Mean absolute error 37751.11379394999 microseconds
R2 score = 0.9907802076026704
22.521871 * x_bit_length^1 * modulus_limbs^1 + 3.194505 * x_bit_length^1 * modulus_limbs^2 + 12.146947 * x_hamming_weight^1 * modulus_limbs^1 + 3.132385 * x_hamming_weight^1 * modulus_limbs^2


In [44]:
mnt6_pairs_pairsing = analyze_mnt(mnt6, trunc_limit = 0.001, modulus_power = 6)

Fitting final exp price
['x_bit_length^1', 'x_hamming_weight^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4', 'modulus_limbs^5', 'modulus_limbs^6']
Train samples 35001, test samples 3890
Intercept = 0.0
score on training set 0.9903464794419672
score on test set 0.9907342995591065
Model accuracy before manual truncation of coefficients
Max absolute error 1439058.1257978538 microseconds
Mean absolute error 75778.70354264864 microseconds
R2 score = 0.9907342995591065
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 1439058.1257978538 microseconds
Mean absolute error 75778.70354264864 microseconds
R2 score = 0.9907342995591065
1.618936 * modulus_limbs^2 + 45.10807 * x_bit_length^1 * modulus_limbs^1 + 5.585912 * x_bit_length^1 * modulus_limbs^2 + 31.773075 * x_hamming_weight^1 * modulus_limbs^1 + 5.524571 * x_hamming_weight^1 * modulus_limbs^2


In [45]:
from joblib import dump

def dump_model(model, features_description, name):
    result = {}
    result["features"] = features_description
    result["model"] = model
    dump(result, "{}.joblib".format(name)) 

In [46]:
dump_model(mnt4_pairs_pairsing, [
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("modulus_limbs", 4)], "mnt4_miller")

dump_model(mnt6_pairs_pairsing, [
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("modulus_limbs", 6)], "mnt6_miller")