In [35]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np

# file_list = [("BLS12", "./bls12/monte_carlo_f_exp_50000.csv"),
#             ("BN", "./bn/monte_carlo_f_exp_50000.csv"),
#             ("MNT4", "./mnt4/monte_carlo_f_exp_50000.csv"),
#             ("MNT6", "./mnt6/monte_carlo_f_exp_50000.csv")]

file_list = [("BLS12", "./bls12/monte_carlo_f_exp_55000.csv"),
            ("BN", "./bn/monte_carlo_f_exp_55000.csv"),
            ("MNT4", "./mnt4/monte_carlo_f_exp_100000.csv"),
            ("MNT6", "./mnt6/monte_carlo_f_exp_100000.csv")]



def get_dfs(files):
    results = []
    for file in files:
        (name, path) = file;
        df = pd.read_csv(path)
        df = df[df["x_is_negative"] == 1.0]
        #df.loc[:,"num_pairs"] *= 0.5
        df.drop("x_is_negative", axis = 1, inplace = True)
        results.append(df)
        
    return results

In [43]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def factor_out_final_exp(df, group_by = 3, skip_bad_fits = False):
    df_final_exps = pd.DataFrame(columns = df.columns);
    df_final_exps.drop("num_pairs", axis = 1, inplace = True);
    df_final_exps.drop("run_microseconds", axis = 1, inplace = True);
    df_final_exps["final_exp_time"] = 0.0

    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("run_microseconds", axis = 1, inplace = True);
    df_miller_loops["single_pair_miller_time"] = 0.0

    min_score = 1.0

    for k,g in df.groupby(np.arange(len(df))//group_by):
#         reg = linear_model.LinearRegression(fit_intercept = True)
#         model = reg.fit(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        
        model = Lasso(alpha=0.0001,precompute=True,max_iter=1000,
            positive=True, random_state=9999, selection='random')
        model.fit(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        
        score = model.score(g["num_pairs"][:, np.newaxis], g["run_microseconds"][:, np.newaxis])
        if score < min_score:
            min_score = score
            
        if score < 0.85 and skip_bad_fits:
#             print(g["num_pairs"])
#             print(g["run_microseconds"])
            continue
            
        slope = model.coef_[0];
        intercept = model.intercept_[0];
        
        if slope <= 1 or intercept <= 1:
            continue
            
        g_miller = g.iloc[0].copy()
        g_miller.drop("run_microseconds", inplace = True)
        g_final_exp = g.iloc[0].copy()
        g_final_exp.drop("run_microseconds", inplace = True)

#         g_miller["single_pair_miller_time"] = model.coef_[0][0];
        g_miller["single_pair_miller_time"] = slope;
        
        g_final_exp["final_exp_time"] = intercept;

        g_miller.drop("num_pairs", inplace = True)
        g_final_exp.drop("num_pairs", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)
        
    print("Minimal final exp fitting score = {}".format(min_score))
        
    return (df_miller_loops, df_final_exps)

In [3]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def factor_out_final_exp_non_negative(df, group_by = 3, skip_bad_fits = False):
    df_final_exps = pd.DataFrame(columns = df.columns);
    df_final_exps.drop("num_pairs", axis = 1, inplace = True);
    df_final_exps.drop("run_microseconds", axis = 1, inplace = True);
    df_final_exps["final_exp_time"] = 0.0

    df_miller_loops = pd.DataFrame(columns = df.columns);
    df_miller_loops.drop("num_pairs", axis = 1, inplace = True);
    df_miller_loops.drop("run_microseconds", axis = 1, inplace = True);
    df_miller_loops["single_pair_miller_time"] = 0.0

    min_score = 1.0

    for k,g in df.groupby(np.arange(len(df))//group_by):
        g_copy = pd.DataFrame(g["num_pairs"])
        g_copy["intercept"] = 1.0
        
        model, res = nnls(g_copy[["num_pairs", "intercept"]][:], g["run_microseconds"][:])
            
        g_miller = g.iloc[0].copy()
        g_miller.drop("run_microseconds", inplace = True)
        g_final_exp = g.iloc[0].copy()
        g_final_exp.drop("run_microseconds", inplace = True)
        
        g_miller["single_pair_miller_time"] = model[0];
        g_final_exp["final_exp_time"] = model[1];

        g_miller.drop("num_pairs", inplace = True)
        g_final_exp.drop("num_pairs", inplace = True)

        df_miller_loops = df_miller_loops.append(g_miller, verify_integrity=True)
        df_final_exps = df_final_exps.append(g_final_exp, verify_integrity=True)
        
    print("Minimal final exp fitting score = {}".format(min_score))
        
    return (df_miller_loops, df_final_exps)

In [4]:
dataframes = get_dfs(file_list)

In [5]:
dataframes[0].head()

Unnamed: 0,x_bit_length,x_hamming_weight,modulus_limbs,group_limbs,num_pairs,run_microseconds
0,57,39,6,16,2,22083
1,57,39,6,16,4,25577
2,57,39,6,16,6,36677
3,64,7,7,11,2,15900
4,64,7,7,11,4,22633


In [6]:
dataframes[1].head()

Unnamed: 0,six_u_plus_two_bit_length,six_u_plus_two_hamming,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,run_microseconds
0,51,29,10,2,2,49,29,25174
1,51,29,10,2,4,49,29,28713
2,51,29,10,2,6,49,29,34342
3,5,1,6,4,2,2,2,7393
4,5,1,6,4,4,2,2,8684


In [18]:
dataframes[2].head(15)

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,exp_w0_bit_length,exp_w0_hamming,exp_w0_is_negative,exp_w1_bit_length,exp_w1_hamming,run_microseconds
0,4,5,2,890,597,449,446,1,1184,489,18532
1,4,5,4,890,597,449,446,1,1184,489,34374
2,4,5,8,890,597,449,446,1,1184,489,67828
3,4,5,16,890,597,449,446,1,1184,489,136585
4,4,5,24,890,597,449,446,1,1184,489,207514
5,4,5,32,890,597,449,446,1,1184,489,268734
6,9,1,2,480,38,1795,1644,1,1002,748,22266
7,9,1,4,480,38,1795,1644,1,1002,748,39178
8,9,1,8,480,38,1795,1644,1,1002,748,75236
9,9,1,16,480,38,1795,1644,1,1002,748,140665


In [19]:
dataframes[3].head(15)

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,x_bit_length,x_hamming_weight,exp_w0_bit_length,exp_w0_hamming,exp_w0_is_negative,exp_w1_bit_length,exp_w1_hamming,run_microseconds
0,16,8,2,1752,738,86,17,1,1320,669,459967
1,16,8,4,1752,738,86,17,1,1320,669,895641
2,16,8,8,1752,738,86,17,1,1320,669,1769895
3,16,8,16,1752,738,86,17,1,1320,669,3548325
4,16,8,24,1752,738,86,17,1,1320,669,5311679
5,16,8,32,1752,738,86,17,1,1320,669,6918569
6,15,16,2,487,258,511,491,1,1808,1174,229753
7,15,16,4,487,258,511,491,1,1808,1174,437527
8,15,16,8,487,258,511,491,1,1808,1174,842665
9,15,16,16,487,258,511,491,1,1808,1174,1694210


In [9]:
from sklearn.model_selection import train_test_split

def split_df(df):
    train, test = train_test_split(
        df, test_size=0.10, random_state=42)
    
    print("Train samples {}, test samples {}".format(len(train), len(test)))
    
    return (train, test)

In [10]:
from sklearn import linear_model
from sklearn.linear_model import Lasso

from sklearn.metrics import max_error, mean_absolute_error, r2_score

def pretty_print_polynomial(poly, model, variable_names):
    terms = []

    for term_idx in range(0, poly.powers_.shape[0]):
        coeff = model.coef_[term_idx]
        if coeff == 0:
            continue
        coeff = np.around(coeff, decimals=6)
        subparts = []
        coeff_string = "{}".format(coeff)
        subparts.append(coeff_string)
        for variable_idx in range(0, poly.powers_.shape[1]):
            power = poly.powers_[term_idx, variable_idx]
            if power != 0:
                term_string = '{}^{}'.format(variable_names[variable_idx], power)
                subparts.append(term_string)
        if len(subparts) != 0:
            joined = " * ".join(subparts)
            terms.append(joined)

    polynomial_string = " + ".join(terms)
    print(polynomial_string)
    


def analyze(train, test, features, target, trunc_limit = 0.001, degree = 6):
    poly = PolynomialFeatures(degree = degree, include_bias = False)

    X_train = train[features]
    Y_train = train[target]

    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000,fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]

    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [11]:
def analyze_bls12(df):
    (miller, final_exp) = factor_out_final_exp(df)
    (train, test) = split_df(miller)
    print("Fitting miller loop price")
    model_miller = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs", "group_limbs"], "single_pair_miller_time")
    
    (train, test) = split_df(final_exp)
    print("Fitting final exp price")
    model_final_exp = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs"], "final_exp_time")
    
    return (model_miller, model_final_exp)

In [12]:
(bls_miller, bls_final_exp) = analyze_bls12(dataframes[0])

Minimal final exp fitting score = 0.0
Train samples 12463, test samples 1385
Fitting miller loop price
score on training set 0.9734097577950763
score on test set 0.9759022280372771
Model accuracy before manual truncation of coefficients
Max absolute error 7614.571415982769 microseconds
Mean absolute error 623.7407086228568 microseconds
R2 score = 0.9759022280372771
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 7657.080830071041 microseconds
Mean absolute error 621.2882814217483 microseconds
R2 score = 0.9759987185889156
0.417709 * x_bit_length^1 * modulus_limbs^1 + 0.017594 * x_hamming_weight^1 * modulus_limbs^1 + 18.306288 * modulus_limbs^1 * group_limbs^1 + 0.002166 * x_bit_length^1 * x_hamming_weight^1 * modulus_limbs^1 + 0.144132 * x_bit_length^1 * modulus_limbs^2 + 0.004641 * x_bit_length^1 * modulus_limbs^1 * group_limbs^1 + 0.174109 * x_hamming_weight^1 * modulus_limbs^2 + 4.846864 * modulus_limbs^2 * group_lim

In [13]:
def analyze_bn(df):
    (miller, final_exp) = factor_out_final_exp(df)
    (train, test) = split_df(miller)
    print("Fitting miller loop price")
    model_miller = analyze(train, test, ["six_u_plus_two_bit_length", "six_u_plus_two_hamming", "modulus_limbs", "group_limbs"], "single_pair_miller_time")
    
    (train, test) = split_df(final_exp)
    print("Fitting final exp price")
    model_final_exp = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs"], "final_exp_time")
    
    return (model_miller, model_final_exp)

In [14]:
(bn_miller, bn_final_exp) = analyze_bn(dataframes[1])

Minimal final exp fitting score = 0.005380826021653018
Train samples 12294, test samples 1366
Fitting miller loop price
score on training set 0.974812367499273
score on test set 0.9740105351972641
Model accuracy before manual truncation of coefficients
Max absolute error 10138.032797187745 microseconds
Mean absolute error 633.5917715631521 microseconds
R2 score = 0.9740105351972641
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 10331.477741027258 microseconds
Mean absolute error 629.5417292546151 microseconds
R2 score = 0.9740552865611236
0.194238 * six_u_plus_two_bit_length^1 * modulus_limbs^1 + 0.221893 * six_u_plus_two_hamming^1 * modulus_limbs^1 + 18.859488 * modulus_limbs^1 * group_limbs^1 + 0.00177 * six_u_plus_two_bit_length^1 * six_u_plus_two_hamming^1 * modulus_limbs^1 + 0.160538 * six_u_plus_two_bit_length^1 * modulus_limbs^2 + 0.154685 * six_u_plus_two_hamming^1 * modulus_limbs^2 + 4.87253 * modulus_limbs^2 

In [48]:
def analyze_mnt(df, trunc_limit = 0.001, skip_bad_fits = False):
    (miller, final_exp) = factor_out_final_exp(df, group_by = 6, skip_bad_fits = skip_bad_fits)
    (train, test) = split_df(miller)
    print("Fitting miller loop price")
    model_miller = analyze(train, test, ["x_bit_length", "x_hamming_weight", "modulus_limbs", "group_limbs"], "single_pair_miller_time", trunc_limit = trunc_limit)
    
    (train, test) = split_df(final_exp)
    print("Fitting final exp price")
    model_final_exp = analyze(train, test, ["exp_w0_bit_length", "exp_w0_hamming", "exp_w1_bit_length", "exp_w1_hamming", "modulus_limbs"], "final_exp_time", trunc_limit = trunc_limit)
    
    return (model_miller, model_final_exp)

In [49]:
(mnt4_miller, mnt4_final_exp) = analyze_mnt(dataframes[2], trunc_limit = 0.001, skip_bad_fits = True)
print("\n\n\n")
(mnt4_miller, mnt4_final_exp) = analyze_mnt(dataframes[2], trunc_limit = 0.0, skip_bad_fits = True)

Minimal final exp fitting score = 0.0
Train samples 29468, test samples 3275
Fitting miller loop price
score on training set 0.9966190639346156
score on test set 0.9963893679680819
Model accuracy before manual truncation of coefficients
Max absolute error 12612.867689504259 microseconds
Mean absolute error 1135.9836354343354 microseconds
R2 score = 0.9963893679680818
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 47540.19537147235 microseconds
Mean absolute error 4395.452396482143 microseconds
R2 score = 0.9170334811543186
0.212922 * x_bit_length^1 + 0.977996 * x_bit_length^1 * modulus_limbs^1 + 0.707895 * x_hamming_weight^1 * modulus_limbs^1 + 43.642287 * modulus_limbs^1 * group_limbs^1 + 0.040712 * x_bit_length^1 * modulus_limbs^2 + 0.001622 * x_bit_length^1 * modulus_limbs^1 * group_limbs^1 + 0.066906 * x_hamming_weight^1 * modulus_limbs^2 + 3.573559 * modulus_limbs^2 * group_limbs^1 + 0.001904 * x_bit_length^1 * mo

In [17]:
(mnt6_miller, mnt6_final_exp) = analyze_mnt(dataframes[3], trunc_limit = 0.001)
print("\n\n\n")
(mnt6_miller, mnt6_final_exp) = analyze_mnt(dataframes[3], trunc_limit = 0.0)

Minimal final exp fitting score = 0.0
Train samples 33059, test samples 3674
Fitting miller loop price
score on training set 0.9881950088343412
score on test set 0.9886002555395369
Model accuracy before manual truncation of coefficients
Max absolute error 41392.449385947344 microseconds
Mean absolute error 3654.072324593691 microseconds
R2 score = 0.9886002555395369
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 40131.02170133125 microseconds
Mean absolute error 3996.5745826919983 microseconds
R2 score = 0.9875753753310927
0.386594 * x_hamming_weight^1 + 0.566254 * x_bit_length^1 * modulus_limbs^1 + 0.077422 * x_bit_length^1 * group_limbs^1 + 0.332178 * x_hamming_weight^1 * modulus_limbs^1 + 0.042431 * x_hamming_weight^1 * group_limbs^1 + 3.699187 * group_limbs^2 + 0.275471 * x_bit_length^1 * modulus_limbs^2 + 0.247976 * x_hamming_weight^1 * modulus_limbs^2 + 12.326227 * modulus_limbs^2 * group_limbs^1 + 0.08341 * modu

KeyboardInterrupt: 

In [22]:
def analyze_manual_poly(df, features_description, target, trunc_limit = 0.001, degree = 3):
    
    new_df = df.copy()
    features = []
    for feature in features_description:
        name, max_power = feature
        for i in range(1, max_power+1):
            subname = "{}^{}".format(name, i)
            new_df[subname] = new_df[name].apply(lambda x: x**i)
            features.append(subname)
            
    print(features)
            
    poly = PolynomialFeatures(degree = degree, interaction_only=True, include_bias = False)
        
    train, test = split_df(new_df)

    X_train = train[features]
    Y_train = train[target]
    
    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000, fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)
    
    print("Intercept = {}".format(lin.intercept_))

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]
    
    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [23]:
def analyze_mnt_final_exp(df, trunc_limit = 0.001):
    (miller, final_exp) = factor_out_final_exp(df, group_by = 6)

    print("Fitting final exp price")
    model_final_exp = analyze_manual_poly(final_exp, [
        ("exp_w0_bit_length", 1), 
        ("exp_w0_hamming", 1), 
        ("exp_w1_bit_length", 1),
        ("exp_w1_hamming", 1),
        ("modulus_limbs", 12)], "final_exp_time", trunc_limit = trunc_limit, degree = 2)
    
    return model_final_exp

In [24]:
mnt4_final_exp_alt = analyze_mnt_final_exp(dataframes[2], trunc_limit = 0.001)
# print("\n\n\n") 
# mnt4_final_exp_alt = analyze_mnt_final_exp(dataframes[2], trunc_limit = 0.0)

KeyboardInterrupt: 

In [25]:
def analyze_manual_poly_separated(df, powered_features, linear_features, target, trunc_limit = 0.001):
    
    new_df = df.copy()
    sub_features = []
    for feature in powered_features:
        name, max_power = feature
        for i in range(1, max_power+1):
            subname = "{}^{}".format(name, i)
            new_df[subname] = new_df[name].apply(lambda x: x**i)
            sub_features.append(subname)
    
    features = []
    
    for subfeature in sub_features:
        for lin_feature in linear_features:
            subname = "{}*{}".format(subfeature, lin_feature)
            new_df[subname] = new_df.apply(lambda row: (row[subfeature]*row[lin_feature]), axis=1)
            features.append(subname)
            
            
    print(features)
        
    train, test = split_df(new_df)

    X_train = train[features]
    Y_train = train[target]

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000, fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)
    
    print("Intercept = {}".format(lin.intercept_))

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
#     pretty_print_polynomial(poly, lin, features)
    
    return lin

In [27]:
def analyze_mnt_final_exp_manual(df, trunc_limit = 0.001):
    (miller, final_exp) = factor_out_final_exp(df, group_by = 6, skip_bad_fits = True)
#     (miller, final_exp) = factor_out_final_exp_non_negative(df, skip_bad_fits = True)
    
    print(final_exp.head(25))

    print("Fitting final exp price")
    model_final_exp = analyze_manual_poly_separated(final_exp, [
        ("modulus_limbs", 12)], 
          ["exp_w0_bit_length", "exp_w0_hamming","exp_w1_bit_length","exp_w1_hamming"],
                              "final_exp_time", trunc_limit = trunc_limit)
    
    return model_final_exp

In [28]:
mnt4_final_exp_alt = analyze_mnt_final_exp_manual(dataframes[2], trunc_limit = 0.001)
# print("\n\n\n") 
# mnt4_final_exp_alt = analyze_mnt_final_exp(dataframes[2], trunc_limit = 0.0)

Minimal final exp fitting score = 0.0
     modulus_limbs  group_limbs  x_bit_length  x_hamming_weight  \
0              4.0          5.0         890.0             597.0   
6              9.0          1.0         480.0              38.0   
12             4.0          9.0         615.0             226.0   
18             8.0          9.0        1185.0             634.0   
24             9.0         16.0        1156.0             923.0   
36             5.0          4.0        1097.0             288.0   
42            13.0         14.0        1390.0            1011.0   
48            13.0          4.0         470.0               3.0   
60            11.0          5.0        1070.0             358.0   
66            14.0          1.0         192.0              20.0   
78            13.0          5.0        1888.0             161.0   
84            16.0          1.0          24.0              12.0   
96             7.0          7.0         729.0             668.0   
108           10.0      

In [36]:
def mnt_reduce_num_features(df):
    new_df = df.copy()
    new_df["exp_bit_length"] = new_df.apply(lambda row: (row["exp_w0_bit_length"]+row["exp_w1_bit_length"]), axis=1)
    new_df["exp_hamming"] = new_df.apply(lambda row: (row["exp_w0_hamming"]+row["exp_w1_hamming"]), axis=1)
    new_df.drop("x_bit_length", axis = 1, inplace = True)
    new_df.drop("x_hamming_weight", axis = 1, inplace = True)
    new_df.drop("exp_w0_bit_length", axis = 1, inplace = True)
    new_df.drop("exp_w1_bit_length", axis = 1, inplace = True)
    new_df.drop("exp_w0_hamming", axis = 1, inplace = True)
    new_df.drop("exp_w1_hamming", axis = 1, inplace = True)
    new_df.drop("exp_w0_is_negative", axis = 1, inplace = True)
    
    return new_df

In [44]:
def analyze_mnt_final_exp_manual_reduced_features(df, trunc_limit = 0.001):
    (miller, final_exp) = factor_out_final_exp(df, group_by = 6, skip_bad_fits = True)
#     (miller, final_exp) = factor_out_final_exp_non_negative(df, skip_bad_fits = True)
    
    final_exp_reduced = mnt_reduce_num_features(final_exp)
    print(final_exp_reduced.head(10))

    print("Fitting final exp price")
    model_final_exp = analyze_manual_poly_separated(final_exp_reduced, [
        ("modulus_limbs", 12)], 
          ["exp_bit_length", "exp_hamming", "group_limbs"],
                              "final_exp_time", trunc_limit = trunc_limit)
    
    return model_final_exp

In [45]:
mnt4_final_exp_alt = analyze_mnt_final_exp_manual_reduced_features(dataframes[2], trunc_limit = 0.001)

Minimal final exp fitting score = 0.0
    modulus_limbs  group_limbs  final_exp_time  exp_bit_length  exp_hamming
0             4.0          5.0     1328.070700          1633.0        935.0
6             9.0          1.0     5716.329890          2797.0       2392.0
12            4.0          9.0     2590.268627          2576.0       1164.0
18            8.0          9.0     4567.731398          2591.0       1702.0
24            9.0         16.0    10856.607929          2332.0       1370.0
36            5.0          4.0     1954.445818          1364.0        164.0
42           13.0         14.0    17307.979277          1155.0         62.0
48           13.0          4.0     9104.556091          2085.0       1955.0
60           11.0          5.0     7750.755903          1339.0        120.0
66           14.0          1.0    10698.110285          2808.0        992.0
Fitting final exp price
['modulus_limbs^1*exp_bit_length', 'modulus_limbs^1*exp_hamming', 'modulus_limbs^1*group_limbs', 'modu