Perform multivariate fit for BLS12 curves. Base parameters are number of limbs, number of bits in `x` parameter that generates the curve and hamming weight of `x`.

In [1]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np

df = pd.read_csv("monte_carlo_f_exp.csv")
df = df[df["x_is_negative"] == 1.0]
df.drop("x_is_negative", axis = 1, inplace = True)

df.head(5)

Unnamed: 0,x_bit_length,x_hamming_weight,modulus_limbs,num_pairs,run_microseconds
1,128,89,15,2,109252
3,128,89,15,4,164052
5,128,89,15,6,211699
7,78,71,13,2,63639
9,78,71,13,4,91392


In [2]:
from sklearn import linear_model

df_final_exps = pd.DataFrame(columns = df.columns);
df_final_exps.drop("num_pairs", axis = 1, inplace = True);
df_final_exps.drop("run_microseconds", axis = 1, inplace = True);
df_final_exps["final_exp_time"] = 0.0

df_miller_loops = pd.DataFrame(columns = df.columns);
df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
df_miller_loops.drop("run_microseconds", axis = 1, inplace = True);
df_miller_loops["single_pair_miller_time"] = 0.0


for k,g in df.groupby(np.arange(len(df))//3):
    reg = linear_model.LinearRegression(fit_intercept = True)
    model = reg.fit(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
#     score = model.score(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
#     if score < 0.9:
#         print(g)
#         print(model.coef_[0][0])
#         print(model.intercept_[0])
#         print(score)
#     assert(model.score(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis]) > 0.9)
    g_miller = g.iloc[0].copy()
    g_miller.drop("run_microseconds", inplace = True)
    g_final_exp = g.iloc[0].copy()
    g_final_exp.drop("run_microseconds", inplace = True)
    g_miller["single_pair_miller_time"] = model.coef_[0][0];
    g_final_exp["final_exp_time"] = model.intercept_[0];
    
    g_miller.drop("num_pairs", inplace = True)
    g_final_exp.drop("num_pairs", inplace = True)
    
    df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
    df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)

In [3]:
df_miller_loops.head(5)

Unnamed: 0,x_bit_length,x_hamming_weight,modulus_limbs,single_pair_miller_time
1,128.0,89.0,15.0,25611.75
7,78.0,71.0,13.0,15335.0
13,111.0,76.0,8.0,4508.0
19,108.0,92.0,10.0,6113.75
25,84.0,6.0,13.0,12843.5


In [4]:
df_final_exps.head(5)

Unnamed: 0,x_bit_length,x_hamming_weight,modulus_limbs,final_exp_time
1,128.0,89.0,15.0,59220.666667
7,78.0,71.0,13.0,31996.666667
13,111.0,76.0,8.0,12711.666667
19,108.0,92.0,10.0,33330.666667
25,84.0,6.0,13.0,27740.0


Fit final exponentiation only

In [5]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(
    df_final_exps, test_size=0.10, random_state=42)

print("Train set size {}".format(len(train)))
print("Test set size {}".format(len(test)))

test.head(5)

Train set size 4498
Test set size 500


Unnamed: 0,x_bit_length,x_hamming_weight,modulus_limbs,final_exp_time
505,109.0,1.0,10.0,18808.333333
14821,7.0,4.0,4.0,1846.0
16819,73.0,51.0,6.0,7294.666667
29917,51.0,9.0,16.0,68019.0
29539,119.0,93.0,6.0,9587.333333


In [6]:
from sklearn import linear_model

poly = PolynomialFeatures(degree=6, include_bias = False)
# poly = PolynomialFeatures(degree=6, include_bias = True)

X_train = train[["x_bit_length", "x_hamming_weight", "modulus_limbs"]]
Y_train = train["final_exp_time"]

X_train = poly.fit_transform(X_train)

from sklearn import linear_model
from sklearn.linear_model import Lasso

lin_final_exp = Lasso(alpha=0.0001,precompute=True, max_iter=10000,fit_intercept=False,
            positive=True, random_state=9999, selection='random')
lin_final_exp.fit(X_train, Y_train)

print(lin_final_exp.score(X_train, Y_train))

X_test = test[["x_bit_length", "x_hamming_weight", "modulus_limbs"]]
Y_test = test["final_exp_time"]

X_test = poly.fit_transform(X_test)

print(lin_final_exp.score(X_test, Y_test))

0.9356652225936256
0.9326916180481005


In [7]:
terms = []
variable_names = ["x_bits", "x_hamming", "limbs"]

for term_idx in range(0, poly.powers_.shape[0]):
    coeff = lin_final_exp.coef_[term_idx]
    if coeff == 0.0:
        continue
    coeff = np.around(coeff, decimals=3)
    subparts = []
    coeff_string = "{}".format(coeff)
    subparts.append(coeff_string)
    for variable_idx in range(0, poly.powers_.shape[1]):
        power = poly.powers_[term_idx, variable_idx]
        if power != 0:
            term_string = '{}^{}'.format(variable_names[variable_idx], power)
            subparts.append(term_string)
    if len(subparts) != 0:
        joined = " * ".join(subparts)
        terms.append(joined)
                                                                                      
polynomial_string = " + ".join(terms)
print(polynomial_string)

0.779 * x_bits^1 * limbs^1 + 2.45 * x_hamming^1 * limbs^1 + 53.569 * limbs^2 + 0.0 * x_bits^1 * x_hamming^1 * limbs^1 + 0.366 * x_bits^1 * limbs^2 + 0.617 * x_hamming^1 * limbs^2 + 4.893 * limbs^3 + 0.0 * x_bits^2 * x_hamming^1 * limbs^1 + 0.0 * x_bits^1 * x_hamming^1 * limbs^2 + 0.009 * x_bits^1 * limbs^3 + 0.042 * limbs^4 + 0.0 * x_bits^3 * x_hamming^1 * limbs^1 + 0.0 * x_bits^2 * x_hamming^1 * limbs^2 + 0.0 * x_bits^4 * x_hamming^1 * limbs^1 + 0.0 * x_bits^3 * x_hamming^1 * limbs^2


In [8]:
from sklearn.metrics import max_error, mean_absolute_error, r2_score

y_true = Y_test
y_pred = lin_final_exp.predict(X_test)

print("Model accuracy before manual truncation of coefficients")
print(max_error(y_true, y_pred))
print(mean_absolute_error(y_true, y_pred))
print(r2_score(y_true, y_pred))

Model accuracy before manual truncation of coefficients
26379.064345265804
2907.0409246951854
0.9326916180481006


Actually filter coefficients and recheck

In [9]:
trunc_limit = 0.001

coeffs = lin_final_exp.coef_.copy()
for k in range(0, coeffs.shape[0]):
    c = coeffs[k]
    if c < trunc_limit:
        coeffs[k] = 0.0

lin_final_exp.coef_ = coeffs

y_true = Y_test
y_pred = lin_final_exp.predict(X_test)

print("Model accuracy after manual truncation of coefficients")
print(max_error(y_true, y_pred))
print(mean_absolute_error(y_true, y_pred))
print(r2_score(y_true, y_pred))

Model accuracy after manual truncation of coefficients
26443.504134705196
2915.5196052459023
0.9327999499182335


In [10]:
terms = []
variable_names = ["x_bits", "x_hamming", "limbs"]

for term_idx in range(0, poly.powers_.shape[0]):
    coeff = lin_final_exp.coef_[term_idx]
    if coeff == 0.0:
        continue
    coeff = np.around(coeff, decimals=3)
    subparts = []
    coeff_string = "{}".format(coeff)
    subparts.append(coeff_string)
    for variable_idx in range(0, poly.powers_.shape[1]):
        power = poly.powers_[term_idx, variable_idx]
        if power != 0:
            term_string = '{}^{}'.format(variable_names[variable_idx], power)
            subparts.append(term_string)
    if len(subparts) != 0:
        joined = " * ".join(subparts)
        terms.append(joined)
                                                                                      
polynomial_string = " + ".join(terms)
print(polynomial_string)

0.779 * x_bits^1 * limbs^1 + 2.45 * x_hamming^1 * limbs^1 + 53.569 * limbs^2 + 0.366 * x_bits^1 * limbs^2 + 0.617 * x_hamming^1 * limbs^2 + 4.893 * limbs^3 + 0.009 * x_bits^1 * limbs^3 + 0.042 * limbs^4


Fit Miller loop cost per one pair

In [11]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(
    df_miller_loops, test_size=0.10, random_state=42)

print("Train set size {}".format(len(train)))
print("Test set size {}".format(len(test)))

test.head(5)

Train set size 4498
Test set size 500


Unnamed: 0,x_bit_length,x_hamming_weight,modulus_limbs,single_pair_miller_time
505,109.0,1.0,10.0,5760.75
14821,7.0,4.0,4.0,342.25
16819,73.0,51.0,6.0,1591.0
29917,51.0,9.0,16.0,11891.5
29539,119.0,93.0,6.0,2468.5


In [12]:
from sklearn import linear_model

poly = PolynomialFeatures(degree=6, include_bias = False)

X_train = train[["x_bit_length", "x_hamming_weight", "modulus_limbs"]]
Y_train = train["single_pair_miller_time"]

X_train = poly.fit_transform(X_train)

from sklearn import linear_model
from sklearn.linear_model import Lasso

lin_miller = Lasso(alpha=0.0001,precompute=True, max_iter=10000,fit_intercept=False,
            positive=True, random_state=9999, selection='random')
lin_miller.fit(X_train, Y_train)

print(lin_miller.score(X_train, Y_train))

X_test = test[["x_bit_length", "x_hamming_weight", "modulus_limbs"]]
Y_test = test["single_pair_miller_time"]

X_test = poly.fit_transform(X_test)

print(lin_miller.score(X_test, Y_test))

0.9648838381664456
0.9661940769037191


In [13]:
from sklearn.metrics import max_error, mean_absolute_error, r2_score

y_true = Y_test
y_pred = lin_miller.predict(X_test)

print("Model accuracy before manual truncation of coefficients")
print(max_error(y_true, y_pred))
print(mean_absolute_error(y_true, y_pred))
print(r2_score(y_true, y_pred))

Model accuracy before manual truncation of coefficients
6502.743952440023
806.715092693171
0.9661940769037191


In [14]:
terms = []
variable_names = ["x_bits", "x_hamming", "limbs"]

for term_idx in range(0, poly.powers_.shape[0]):
    coeff = lin_miller.coef_[term_idx]
    if coeff == 0:
        continue
    coeff = np.around(coeff, decimals=3)
    subparts = []
    coeff_string = "{}".format(coeff)
    subparts.append(coeff_string)
    for variable_idx in range(0, poly.powers_.shape[1]):
        power = poly.powers_[term_idx, variable_idx]
        if power != 0:
            term_string = '{}^{}'.format(variable_names[variable_idx], power)
            subparts.append(term_string)
    if len(subparts) != 0:
        joined = " * ".join(subparts)
        terms.append(joined)
                                                                                      
polynomial_string = " + ".join(terms)
print(polynomial_string)

0.237 * x_bits^1 * limbs^1 + 0.143 * x_hamming^1 * limbs^1 + 22.11 * limbs^2 + 0.0 * x_bits^2 * limbs^1 + 0.0 * x_bits^1 * x_hamming^1 * limbs^1 + 0.162 * x_bits^1 * limbs^2 + 0.002 * x_hamming^2 * limbs^1 + 0.169 * x_hamming^1 * limbs^2 + 2.074 * limbs^3 + 0.023 * limbs^4 + 0.0 * x_bits^6 + 0.0 * x_bits^5 * limbs^1


Filter small coefficients and recheck

In [15]:
trunc_limit = 0.001

coeffs = lin_miller.coef_.copy()
for k in range(0, coeffs.shape[0]):
    c = coeffs[k]
    if c < trunc_limit:
        coeffs[k] = 0.0

lin_miller.coef_ = coeffs

y_true = Y_test
y_pred = lin_miller.predict(X_test)

print("Model accuracy after manual truncation of coefficients")
print(max_error(y_true, y_pred))
print(mean_absolute_error(y_true, y_pred))
print(r2_score(y_true, y_pred))

Model accuracy after manual truncation of coefficients
6502.235071679888
806.4619844764101
0.9661922887203355


In [16]:
terms = []
variable_names = ["x_bits", "x_hamming", "limbs"]

for term_idx in range(0, poly.powers_.shape[0]):
    coeff = lin_miller.coef_[term_idx]
    if coeff == 0:
        continue
    coeff = np.around(coeff, decimals=3)
    subparts = []
    coeff_string = "{}".format(coeff)
    subparts.append(coeff_string)
    for variable_idx in range(0, poly.powers_.shape[1]):
        power = poly.powers_[term_idx, variable_idx]
        if power != 0:
            term_string = '{}^{}'.format(variable_names[variable_idx], power)
            subparts.append(term_string)
    if len(subparts) != 0:
        joined = " * ".join(subparts)
        terms.append(joined)
                                                                                      
polynomial_string = " + ".join(terms)
print(polynomial_string)

0.237 * x_bits^1 * limbs^1 + 0.143 * x_hamming^1 * limbs^1 + 22.11 * limbs^2 + 0.162 * x_bits^1 * limbs^2 + 0.002 * x_hamming^2 * limbs^1 + 0.169 * x_hamming^1 * limbs^2 + 2.074 * limbs^3 + 0.023 * limbs^4
