In [6]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np
import math

# file_list = [("BLS12", "./bls12/monte_carlo_f_exp_50000.csv"),
#             ("BN", "./bn/monte_carlo_f_exp_50000.csv"),
#             ("MNT4", "./mnt4/monte_carlo_f_exp_50000.csv"),
#             ("MNT6", "./mnt6/monte_carlo_f_exp_50000.csv")]

file_list = [("BLS12", "./bls12/monte_carlo_f_exp_55000.csv"),
            ("BN", "./bn/monte_carlo_f_exp_55000.csv")]


gas_factor = 15

def get_dfs(files):
    results = []
    for file in files:
        (name, path) = file;
        df = pd.read_csv(path)
        df = df[df["x_is_negative"] == 1.0]
        #df.loc[:,"num_pairs"] *= 0.5
        df.drop("x_is_negative", axis = 1, inplace = True)
        df["gas"] = df["run_microseconds"].apply(lambda x: math.ceil(x*gas_factor))
        df.drop("run_microseconds", axis = 1, inplace = True);
        results.append(df)
        
    return results

In [7]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def factor_out_final_exp(df, group_by = 3, skip_bad_fits = False):
    df_final_exps = pd.DataFrame(columns = df.columns);
    df_final_exps.drop("num_pairs", axis = 1, inplace = True);
    df_final_exps.drop("gas", axis = 1, inplace = True);
    df_final_exps["final_exp_gas"] = 0.0

    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("gas", axis = 1, inplace = True);
    df_miller_loops["single_pair_miller_gas"] = 0.0

    min_score = 1.0

    for k,g in df.groupby(np.arange(len(df))//group_by):
#         reg = linear_model.LinearRegression(fit_intercept = True)
#         model = reg.fit(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        
        model = Lasso(alpha=0.0001,precompute=True,max_iter=1000,
            positive=True, random_state=9999, selection='random')
        model.fit(g["num_pairs"][:, np.newaxis], g["gas"][:, np.newaxis])
        
        score = model.score(g["num_pairs"][:, np.newaxis], g["gas"][:, np.newaxis])
        if score < min_score:
            min_score = score
            
        if score < 0.85 and skip_bad_fits:
#             print(g["num_pairs"])
#             print(g["run_microseconds"])
            continue
            
        slope = model.coef_[0];
        intercept = model.intercept_[0];
        
        if slope <= 1 or intercept <= 1:
            continue
            
        g_miller = g.iloc[0].copy()
        g_miller.drop("gas", inplace = True)
        g_final_exp = g.iloc[0].copy()
        g_final_exp.drop("gas", inplace = True)

#         g_miller["single_pair_miller_time"] = model.coef_[0][0];
        g_miller["single_pair_miller_gas"] = slope;
        
        g_final_exp["final_exp_gas"] = intercept;

        g_miller.drop("num_pairs", inplace = True)
        g_final_exp.drop("num_pairs", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)
        
    print("Minimal final exp fitting score = {}".format(min_score))
        
    return (df_miller_loops, df_final_exps)

In [8]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def factor_out_final_exp_non_negative(df, group_by = 3, skip_bad_fits = False):
    df_final_exps = pd.DataFrame(columns = df.columns);
    df_final_exps.drop("num_pairs", axis = 1, inplace = True);
    df_final_exps.drop("gas", axis = 1, inplace = True);
    df_final_exps["final_exp_gas"] = 0.0

    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("gas", axis = 1, inplace = True);
    df_miller_loops["single_pair_miller_gas"] = 0.0

    min_score = 1.0

    for k,g in df.groupby(np.arange(len(df))//group_by):
        g_copy = pd.DataFrame(g["num_pairs"])
        g_copy["intercept"] = 1.0
        
        model, res = nnls(g_copy[["num_pairs", "intercept"]][:], g["gas"][:])
            
        g_miller = g.iloc[0].copy()
        g_miller.drop("gas", inplace = True)
        g_final_exp = g.iloc[0].copy()
        g_final_exp.drop("gas", inplace = True)
        
        g_miller["single_pair_miller_gas"] = model[0];
        g_final_exp["final_exp_gas"] = model[1];

        g_miller.drop("num_pairs", inplace = True)
        g_final_exp.drop("num_pairs", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)
        
    print("Minimal final exp fitting score = {}".format(min_score))
        
    return (df_miller_loops, df_final_exps)

In [9]:
dataframes = get_dfs(file_list)

In [10]:
dataframes[0].head()

Unnamed: 0,x_bit_length,x_hamming_weight,modulus_limbs,group_limbs,num_pairs,gas
0,57,39,6,16,2,331245
1,57,39,6,16,4,383655
2,57,39,6,16,6,550155
3,64,7,7,11,2,238500
4,64,7,7,11,4,339495


In [11]:
dataframes[1].head()

Unnamed: 0,six_u_plus_two_bit_length,six_u_plus_two_hamming,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,gas
0,51,29,10,2,2,49,29,377610
1,51,29,10,2,4,49,29,430695
2,51,29,10,2,6,49,29,515130
3,5,1,6,4,2,2,2,110895
4,5,1,6,4,4,2,2,130260


In [12]:
from sklearn.model_selection import train_test_split

def split_df(df):
    train, test = train_test_split(
        df, test_size=0.10, random_state=42)
    
    print("Train samples {}, test samples {}".format(len(train), len(test)))
    
    return (train, test)

In [22]:
from sklearn import linear_model
from sklearn.linear_model import Lasso

from sklearn.metrics import max_error, mean_absolute_error, r2_score

def pretty_print_polynomial(poly, model, variable_names):
    terms = []

    for term_idx in range(0, poly.powers_.shape[0]):
        coeff = model.coef_[term_idx]
        if coeff == 0:
            continue
        coeff = np.around(coeff, decimals=6)
        subparts = []
        coeff_string = "{}".format(coeff)
        subparts.append(coeff_string)
        for variable_idx in range(0, poly.powers_.shape[1]):
            power = poly.powers_[term_idx, variable_idx]
            if power != 0:
                if power == 1:
                    term_string = '{}'.format(variable_names[variable_idx])
                    subparts.append(term_string)
                else:
                    term_string = '{}^{}'.format(variable_names[variable_idx], power)
                    subparts.append(term_string)
        if len(subparts) != 0:
            joined = " * ".join(subparts)
            terms.append(joined)

    polynomial_string = " + ".join(terms)
    print(polynomial_string)


In [23]:
def analyze_manual_poly(df, features_description, target, trunc_limit = 0.001, degree = 2):
    
    new_df = df.copy()
    features = []
    for feature in features_description:
        name, max_power = feature
        for i in range(1, max_power+1):
            subname = "{}^{}".format(name, i)
            new_df[subname] = new_df[name].apply(lambda x: x**i)
            features.append(subname)
            
    print(features)
            
    poly = PolynomialFeatures(degree = degree, interaction_only=True, include_bias = False)
        
    train, test = split_df(new_df)

    X_train = train[features]
    Y_train = train[target]
    
    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000, fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)
    
    print("Intercept = {}".format(lin.intercept_))

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]
    
    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [24]:
def analyze_bls12_alt(df, trunc_limit = 0.001, modulus_power = 12):
    (miller, final_exp) = factor_out_final_exp(df)
    print("Fitting miller loop price")

    model_miller = analyze_manual_poly(miller, [
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("group_limbs", 1),
        ("modulus_limbs", modulus_power)], "single_pair_miller_gas", trunc_limit = trunc_limit, degree = 2)
    
    print("Fitting final exp price")
    model_final_exp = analyze_manual_poly(final_exp, [
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("modulus_limbs", modulus_power)], "final_exp_gas", trunc_limit = trunc_limit, degree = 2)
    
    return (model_miller, model_final_exp)

In [25]:
(bls_miller, bls_final_exp) = analyze_bls12_alt(dataframes[0])

Minimal final exp fitting score = 0.0
Fitting miller loop price
['x_bit_length^1', 'x_hamming_weight^1', 'group_limbs^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4', 'modulus_limbs^5', 'modulus_limbs^6', 'modulus_limbs^7', 'modulus_limbs^8', 'modulus_limbs^9', 'modulus_limbs^10', 'modulus_limbs^11', 'modulus_limbs^12']
Train samples 12463, test samples 1385
Intercept = 0.0
score on training set 0.9733396420490379
score on test set 0.975939626970064
Model accuracy before manual truncation of coefficients
Max absolute error 115436.0662395673 microseconds
Mean absolute error 9358.192465276705 microseconds
R2 score = 0.975939626970064
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 115436.0662395673 microseconds
Mean absolute error 9358.192465276705 microseconds
R2 score = 0.975939626970064
0.001099 * x_bit_length^1 * x_hamming_weight^1 + 8.78378 * x_bit_length^1 * modulus_limbs^1 + 1.993631 

In [26]:
def analyze_bn_alt(df, trunc_limit = 0.001, modulus_power = 12):
    (miller, final_exp) = factor_out_final_exp(df)
    print("Fitting miller loop price")

    model_miller = analyze_manual_poly(miller, [
        ("six_u_plus_two_bit_length", 1),
        ("six_u_plus_two_hamming", 1),
        ("group_limbs", 1),
        ("modulus_limbs", modulus_power)], "single_pair_miller_gas", trunc_limit = trunc_limit, degree = 2)
    
    print("Fitting final exp price")
    model_final_exp = analyze_manual_poly(final_exp, [
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("modulus_limbs", modulus_power)], "final_exp_gas", trunc_limit = trunc_limit, degree = 2)
    
    return (model_miller, model_final_exp)

In [27]:
(bn_miller, bn_final_exp) = analyze_bn_alt(dataframes[1])

Minimal final exp fitting score = 0.005380826021655238
Fitting miller loop price
['six_u_plus_two_bit_length^1', 'six_u_plus_two_hamming^1', 'group_limbs^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4', 'modulus_limbs^5', 'modulus_limbs^6', 'modulus_limbs^7', 'modulus_limbs^8', 'modulus_limbs^9', 'modulus_limbs^10', 'modulus_limbs^11', 'modulus_limbs^12']
Train samples 12294, test samples 1366
Intercept = 0.0
score on training set 0.9747528073508434
score on test set 0.9739903899963259
Model accuracy before manual truncation of coefficients
Max absolute error 151941.08370264742 microseconds
Mean absolute error 9526.231302755285 microseconds
R2 score = 0.9739903899963259
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 151941.08370264742 microseconds
Mean absolute error 9526.231302755285 microseconds
R2 score = 0.9739903899963259
0.140358 * six_u_plus_two_bit_length^1 * six_u_plus_two_hammin

In [28]:
model_multiplication_factor = 1000

In [29]:
import json

def process_polynomial_model(model, features_description):
#     expect only 2nd order cross terms
    features = []
    features_encoded_as_int = []
    for (feature_index, feature) in enumerate(features_description):
        name, max_power = feature
        for power in range(1, max_power+1):
            subname = "{}^{}".format(name, power)
            features.append(subname)
            features_encoded_as_int.append((feature_index, power))
            
    poly = PolynomialFeatures(degree = 2, interaction_only=True, include_bias = False)
    
    _ = poly.fit_transform([[0.0]*len(features)])
            
    unrolled_coeffs = []
    for term_idx in range(0, poly.powers_.shape[0]):
        coeff = model.coef_[term_idx]
        coeff = math.ceil(coeff * model_multiplication_factor)
        if coeff == 0:
            continue
        subparts = []
        for variable_idx in range(0, poly.powers_.shape[1]):
            power = poly.powers_[term_idx, variable_idx]
            if power != 0:
                if power == 1:
                    subparts.append(features_encoded_as_int[variable_idx])
#                     term_string = '{}'.format(variable_names[variable_idx])
#                     subparts.append(term_string)
                else:
#                     we do not expect terms like x*x due to features structure
                    assert(False)
#                     term_string = '{}^{}'.format(variable_names[variable_idx], power)
#                     subparts.append(term_string)
        if len(subparts) != 0:
            unrolled_coeffs.append((coeff, subparts))
            
    compressed_terms = []
    for term in unrolled_coeffs:
        coeff, terms = term
        if len(terms) < 2:
            compressed_terms.append((coeff, terms))
        else:
#         there are only two terms max
            if terms[0][0] == terms[1][0]:
                compressed_terms.append((coeff, [(terms[0][0], terms[0][1] + terms[1][1])]))
            else:
                compressed_terms.append((coeff, terms))
                
    deduped_terms = []
    for i in range(0, len(compressed_terms)):
        coeff, terms = compressed_terms[i]
        if len(terms) != 1:
            deduped_terms.append((coeff, terms))
            continue
            
        deduped = False
        for j in range(0, len(deduped_terms)):
            existing_coeff, existing_terms = deduped_terms[j]
            if len(existing_terms) != 1:
                continue
            if terms[0][0] == existing_terms[0][0] and terms[0][1] == existing_terms[0][1]:
                print("Dedup")
                existing_coeff += coeff
                deduped_terms[j] = (existing_coeff, existing_terms)
                deduped = True
                
        if deduped == False:
            deduped_terms.append((coeff, terms))
    
    return deduped_terms

def serialize_bn_bls_model(miller_model, miller_features, final_exp_model, final_exp_features, filename):
    result = {}
    result["multiplier"] = model_multiplication_factor
    result["miller_features"] = miller_features
    result["miller"] = process_polynomial_model(miller_model, miller_features)
    result["final_exp_features"] = final_exp_features
    result["final_exp"] = process_polynomial_model(final_exp_model, final_exp_features)
    
    with open(filename, 'w') as outfile:
        json.dump(result, outfile)

In [31]:
serialize_bn_bls_model(bls_miller,[
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("group_limbs", 1),
        ("modulus_limbs", 12)],
                       bls_final_exp,[
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("modulus_limbs", 12)], "bls12_model.json")

Dedup


In [32]:
serialize_bn_bls_model(bn_miller,[
        ("six_u_plus_two_bit_length", 1),
        ("six_u_plus_two_hamming", 1),
        ("group_limbs", 1),
        ("modulus_limbs", 12)],
                       bn_final_exp,[
        ("x_bit_length", 1),
        ("x_hamming_weight", 1),
        ("modulus_limbs", 12)], "bn_model.json")

Dedup
