In [1]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np
import math

file_list = [("MNT4", "./mnt4/miller_loop_parallel_100.csv"),
            ("MNT6", "./mnt6/miller_loop_parallel_100.csv")]

gas_factor = 15

def get_dfs(files):
    results = []
    for file in files:
        (name, path) = file;
        df = pd.read_csv(path)
        df.drop("x_is_negative", axis = 1, inplace = True)
#         df.drop("num_pairs", axis = 1, inplace = True)
#         df.drop("group_limbs", axis = 1, inplace = True)
#         df.drop("x_bit_length", axis = 1, inplace = True)
#         df.drop("x_hamming_weight", axis = 1, inplace = True)
        df.drop("exp_w0_bit_length", axis = 1, inplace = True)
        df.drop("exp_w0_hamming", axis = 1, inplace = True)
        df.drop("exp_w0_is_negative", axis = 1, inplace = True)
        df.drop("exp_w1_bit_length", axis = 1, inplace = True)
        df.drop("exp_w1_hamming", axis = 1, inplace = True)
        results.append(df)
        
    return results

In [2]:
dataframes = get_dfs(file_list)

In [3]:
dataframes[0].head(15)

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,run_microseconds
0,4,1,2,342,21,4318
1,4,1,4,342,21,7236
2,4,1,8,342,21,12603
3,12,3,2,342,21,23089
4,10,9,2,342,21,26404
5,4,1,16,342,21,25835
6,13,13,2,342,21,52510
7,12,3,4,342,21,46255
8,10,9,4,342,21,53844
9,12,3,8,342,21,90113


In [4]:
dataframes[1].head(15)

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,run_microseconds
0,4,1,2,1515,735,33617
1,4,1,4,1515,735,62682
2,4,1,8,1515,735,142899
3,4,1,16,1515,735,284570
4,12,3,2,1293,511,169513
5,10,9,2,1782,1734,250699
6,12,3,4,1293,511,339256
7,4,1,2,1385,117,24682
8,4,1,4,1385,117,48923
9,4,1,8,1385,117,98340


In [5]:
mnt4_one_off = pd.read_csv("./mnt4/one_off_results.csv")

mnt4_one_off.head(15)

Unnamed: 0,modulus_limbs,gas
0,4,1455
1,5,2250
2,6,3135
3,7,4515
4,8,6435
5,9,8625
6,10,11145
7,11,14130
8,12,18210
9,13,22410


In [6]:
mnt6_one_off = pd.read_csv("./mnt6/one_off_results.csv")

mnt6_one_off.head(15)

Unnamed: 0,modulus_limbs,gas
0,4,2085
1,5,3855
2,6,4815
3,7,7290
4,8,9855
5,9,14070
6,10,18255
7,11,22095
8,12,29310
9,13,34710


In [17]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def correct_and_fit(df, one_offs, skip_bad_fits = False):
    min_score = 1.0
    
    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("run_microseconds", axis = 1, inplace = True);
    df_miller_loops["miller_gas"] = 0.0
    
    for (k, g) in df.groupby(['modulus_limbs', "group_limbs", "x_bit_length", "x_hamming_weight"]):
        g = g.copy()
        g["gas"] = g["run_microseconds"].apply(lambda x: gas_factor * math.ceil(x))
        g.drop("run_microseconds", axis = 1, inplace = True)
        g["gas_corrected"] = g[["modulus_limbs", "gas"]].apply(lambda x: x["gas"] - one_offs[one_offs["modulus_limbs"] == x["modulus_limbs"]]["gas"].array[0], axis=1)

        model = Lasso(alpha=0.0001,precompute=True,max_iter=1000,
            positive=True, random_state=9999, selection='random', fit_intercept = False)
        model.fit(g["num_pairs"][:, np.newaxis], g["gas_corrected"][:, np.newaxis])
        
        score = model.score(g["num_pairs"][:, np.newaxis], g["gas_corrected"][:, np.newaxis])
        if score < min_score:
            min_score = score
            
        if score < 0.85 and skip_bad_fits:
#             print(g["num_pairs"])
#             print(g["run_microseconds"])
            continue
            
        slope = model.coef_[0];
        intercept = model.intercept_;
#         intercept = model.intercept_[0];
        
#         if slope <= 1 or intercept <= 1:
#             continue
            
        g_miller = g.iloc[0].copy()
        g_miller.drop("gas_corrected", inplace = True)

        g_miller["miller_gas"] = slope;

        g_miller.drop("num_pairs", inplace = True)
        g_miller.drop("gas", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        
    print("Min score = {}".format(min_score))
        
    return df_miller_loops



In [18]:
mnt4 = correct_and_fit(dataframes[0], mnt4_one_off)
mnt4.head(25)

Min score = 0.9799056690448423


Unnamed: 0,modulus_limbs,group_limbs,x_bit_length,x_hamming_weight,miller_gas
391,4.0,1.0,9.0,6.0,4470.441175
237,4.0,1.0,10.0,4.0,4109.558822
49,4.0,1.0,29.0,5.0,5586.176469
176,4.0,1.0,42.0,2.0,6390.882352
458,4.0,1.0,76.0,41.0,10755.264705
202,4.0,1.0,94.0,24.0,11501.999999
492,4.0,1.0,122.0,76.0,15727.588234
524,4.0,1.0,132.0,16.0,12973.941175
336,4.0,1.0,138.0,67.0,17143.764705
188,4.0,1.0,142.0,54.0,16620.617646


In [19]:
mnt6 = correct_and_fit(dataframes[1], mnt6_one_off)
mnt6.head(25)

Min score = 0.9825249987538169


Unnamed: 0,modulus_limbs,group_limbs,x_bit_length,x_hamming_weight,miller_gas
326,4.0,1.0,2.0,2.0,6687.529411
238,4.0,1.0,4.0,3.0,6745.147058
173,4.0,1.0,4.0,4.0,7224.441175
162,4.0,1.0,8.0,7.0,7689.882352
472,4.0,1.0,9.0,6.0,7241.117646
369,4.0,1.0,33.0,18.0,12048.176469
330,4.0,1.0,58.0,8.0,14049.35294
243,4.0,1.0,108.0,81.0,29090.382352
277,4.0,1.0,114.0,45.0,26038.85294
100,4.0,1.0,128.0,72.0,30681.882352


In [20]:
from sklearn.model_selection import train_test_split

def split_df(df):
    train, test = train_test_split(
        df, test_size=0.10, random_state=42)
    
    print("Train samples {}, test samples {}".format(len(train), len(test)))
    
    return (train, test)

In [21]:
from sklearn import linear_model
from sklearn.linear_model import Lasso

from sklearn.metrics import max_error, mean_absolute_error, r2_score

def pretty_print_polynomial(poly, model, variable_names):
    terms = []

    for term_idx in range(0, poly.powers_.shape[0]):
        coeff = model.coef_[term_idx]
        if coeff == 0:
            continue
        coeff = np.around(coeff, decimals=6)
        subparts = []
        coeff_string = "{}".format(coeff)
        subparts.append(coeff_string)
        for variable_idx in range(0, poly.powers_.shape[1]):
            power = poly.powers_[term_idx, variable_idx]
            if power != 0:
                if power == 1:
                    term_string = '{}'.format(variable_names[variable_idx])
                    subparts.append(term_string)
                else:
                    term_string = '{}^{}'.format(variable_names[variable_idx], power)
                    subparts.append(term_string)
        if len(subparts) != 0:
            joined = " * ".join(subparts)
            terms.append(joined)

    polynomial_string = " + ".join(terms)
    print(polynomial_string)

In [22]:
def analyze_manual_poly(df, features_description, target, trunc_limit = 0.001, degree = 2):
    
    new_df = df.copy()
    features = []
    for feature in features_description:
        name, max_power = feature
        for i in range(1, max_power+1):
            subname = "{}^{}".format(name, i)
            new_df[subname] = new_df[name].apply(lambda x: x**i)
            features.append(subname)
            
    print(features)
            
    poly = PolynomialFeatures(degree = degree, interaction_only=True, include_bias = False)
        
    train, test = split_df(new_df)

    X_train = train[features]
    Y_train = train[target]
    
    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000, fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)
    
    print("Intercept = {}".format(lin.intercept_))

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]
    
    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [28]:
def analyze_mnt(df, trunc_limit = 0.001, modulus_power = 6):
    print("Fitting final exp price")
    model = analyze_manual_poly(df, [
        ("group_limbs", 1),
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("modulus_limbs", modulus_power)], "miller_gas", trunc_limit = trunc_limit, degree = 2)
    return model

In [29]:
mnt4_pairs_pairsing = analyze_mnt(mnt4, trunc_limit = 0.001, modulus_power = 4)

Fitting final exp price
['group_limbs^1', 'x_bit_length^1', 'x_hamming_weight^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4']
Train samples 18717, test samples 2080
Intercept = 0.0
score on training set 0.9974202767890439
score on test set 0.9975947435835517
Model accuracy before manual truncation of coefficients
Max absolute error 458482.8392652306 microseconds
Mean absolute error 11852.789789820616 microseconds
R2 score = 0.9975947435835517
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 458482.8392652306 microseconds
Mean absolute error 11852.789789820616 microseconds
R2 score = 0.9975947435835517
4.336702 * x_bit_length^1 + 21.697307 * modulus_limbs^2 + 537.061251 * group_limbs^1 * modulus_limbs^1 + 92.448568 * group_limbs^1 * modulus_limbs^2 + 5.798943 * x_bit_length^1 * modulus_limbs^1 + 2.073489 * x_bit_length^1 * modulus_limbs^2 + 7.714144 * x_hamming_weight^1 * modulus_limbs^1 + 

In [30]:
mnt6_pairs_pairsing = analyze_mnt(mnt6, trunc_limit = 0.001, modulus_power = 6)

Fitting final exp price
['group_limbs^1', 'x_bit_length^1', 'x_hamming_weight^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4', 'modulus_limbs^5', 'modulus_limbs^6']
Train samples 18719, test samples 2080
Intercept = 0.0
score on training set 0.9970794146359541
score on test set 0.997512680529535
Model accuracy before manual truncation of coefficients
Max absolute error 928192.471655855 microseconds
Mean absolute error 22691.781754044394 microseconds
R2 score = 0.9975126805295351
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 928192.471655855 microseconds
Mean absolute error 22691.781754044394 microseconds
R2 score = 0.9975126805295351
46.805604 * modulus_limbs^2 + 902.425869 * group_limbs^1 * modulus_limbs^1 + 157.580606 * group_limbs^1 * modulus_limbs^2 + 18.12141 * x_bit_length^1 * modulus_limbs^1 + 3.289248 * x_bit_length^1 * modulus_limbs^2 + 14.653151 * x_hamming_weight^1 * modulus_l

In [33]:
from joblib import dump

def dump_model(model, features_description, name):
    result = {}
    result["features"] = features_description
    result["model"] = model
    dump(result, "{}.joblib".format(name)) 

In [34]:
dump_model(mnt4_pairs_pairsing, [
        ("group_limbs", 1),
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("modulus_limbs", 4)], "mnt4_miller")

dump_model(mnt6_pairs_pairsing, [
        ("group_limbs", 1),
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("modulus_limbs", 6)], "mnt6_miller")