In [17]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np
import math

file_list = [("MNT4", "./mnt4/miller_loop_parallel_50.csv"),
            ("MNT6", "./mnt6/miller_loop_parallel_50.csv")]

gas_factor = 15

def get_dfs(files):
    results = []
    for file in files:
        (name, path) = file;
        df = pd.read_csv(path)
        df.drop("x_is_negative", axis = 1, inplace = True)
#         df.drop("num_pairs", axis = 1, inplace = True)
#         df.drop("group_limbs", axis = 1, inplace = True)
        df.drop("x_bit_length", axis = 1, inplace = True)
        df.drop("x_hamming_weight", axis = 1, inplace = True)
        df.drop("exp_w0_bit_length", axis = 1, inplace = True)
        df.drop("exp_w0_hamming", axis = 1, inplace = True)
        df.drop("exp_w0_is_negative", axis = 1, inplace = True)
        df.drop("exp_w1_bit_length", axis = 1, inplace = True)
        df.drop("exp_w1_hamming", axis = 1, inplace = True)
        results.append(df)
        
    return results

In [18]:
dataframes = get_dfs(file_list)

In [19]:
dataframes[0].head(15)

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,run_microseconds
0,4,1,2,510
1,4,1,4,803
2,4,1,6,1177
3,4,1,8,1617
4,4,1,12,2805
5,4,1,16,3482
6,10,9,2,17094
7,15,7,2,27978
8,4,1,2,565
9,4,1,4,844


In [12]:
dataframes[1].head(15)

Unnamed: 0,modulus_limbs,group_limbs,run_microseconds
0,4,1,1000
1,4,1,1716
2,4,1,2569
3,4,1,3229
4,4,1,4708
5,4,1,6025
6,4,1,926
7,4,1,1719
8,4,1,2627
9,4,1,3353


In [33]:
mnt4_one_off = pd.read_csv("./mnt4/one_off_results.csv")

mnt4_one_off.head(15)

Unnamed: 0,modulus_limbs,gas
0,4,1455
1,5,2250
2,6,3135
3,7,4515
4,8,6435
5,9,8625
6,10,11145
7,11,14130
8,12,18210
9,13,22410


In [34]:
mnt6_one_off = pd.read_csv("./mnt6/one_off_results.csv")

mnt6_one_off.head(15)

Unnamed: 0,modulus_limbs,gas
0,4,2085
1,5,3855
2,6,4815
3,7,7290
4,8,9855
5,9,14070
6,10,18255
7,11,22095
8,12,29310
9,13,34710


In [147]:
def make_averages(df, one_offs):
    average = df.groupby(['modulus_limbs', "group_limbs", "num_pairs"]).mean()
    average["gas"] = average["run_microseconds"].apply(lambda x: gas_factor * math.ceil(x))
    average = average.copy()
    average.reset_index(inplace=True) 
    average.drop("run_microseconds", axis = 1, inplace = True)
    average["gas_corrected"] = average[["modulus_limbs", "gas"]].apply(lambda x: x["gas"] - one_offs[one_offs["modulus_limbs"] == x["modulus_limbs"]]["gas"].array[0], axis=1)

    return average

In [149]:
mnt4 = make_averages(dataframes[0], mnt4_one_off)
mnt4.head(25)

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,gas,gas_corrected
0,4,1,2,8595,7140
1,4,1,4,15525,14070
2,4,1,6,23130,21675
3,4,1,8,29520,28065
4,4,1,12,43590,42135
5,4,1,16,57285,55830
6,4,2,2,15285,13830
7,4,2,4,29295,27840
8,4,2,6,42465,41010
9,4,2,8,55935,54480


In [151]:
mnt6 = make_averages(dataframes[1], mnt6_one_off)
mnt6.head(25)

Unnamed: 0,modulus_limbs,group_limbs,num_pairs,gas,gas_corrected
0,4,1,2,13950,11865
1,4,1,4,25545,23460
2,4,1,6,37065,34980
3,4,1,8,48405,46320
4,4,1,12,71190,69105
5,4,1,16,94410,92325
6,4,2,2,25530,23445
7,4,2,4,48600,46515
8,4,2,6,71850,69765
9,4,2,8,96165,94080


In [152]:
from sklearn.model_selection import train_test_split

def split_df(df):
    train, test = train_test_split(
        df, test_size=0.10, random_state=42)
    
    print("Train samples {}, test samples {}".format(len(train), len(test)))
    
    return (train, test)

In [153]:
from sklearn import linear_model
from sklearn.linear_model import Lasso

from sklearn.metrics import max_error, mean_absolute_error, r2_score

def pretty_print_polynomial(poly, model, variable_names):
    terms = []

    for term_idx in range(0, poly.powers_.shape[0]):
        coeff = model.coef_[term_idx]
        if coeff == 0:
            continue
        coeff = np.around(coeff, decimals=6)
        subparts = []
        coeff_string = "{}".format(coeff)
        subparts.append(coeff_string)
        for variable_idx in range(0, poly.powers_.shape[1]):
            power = poly.powers_[term_idx, variable_idx]
            if power != 0:
                if power == 1:
                    term_string = '{}'.format(variable_names[variable_idx])
                    subparts.append(term_string)
                else:
                    term_string = '{}^{}'.format(variable_names[variable_idx], power)
                    subparts.append(term_string)
        if len(subparts) != 0:
            joined = " * ".join(subparts)
            terms.append(joined)

    polynomial_string = " + ".join(terms)
    print(polynomial_string)

In [155]:
def analyze_manual_poly(df, features_description, target, trunc_limit = 0.001, degree = 3):
    
    new_df = df.copy()
    features = []
    for feature in features_description:
        name, max_power = feature
        for i in range(1, max_power+1):
            subname = "{}^{}".format(name, i)
            new_df[subname] = new_df[name].apply(lambda x: x**i)
            features.append(subname)
            
    print(features)
            
    poly = PolynomialFeatures(degree = degree, interaction_only=True, include_bias = False)
        
    train, test = split_df(new_df)

    X_train = train[features]
    Y_train = train[target]
    
    X_train = poly.fit_transform(X_train)

    lin = Lasso(alpha=0.0001,precompute=True, max_iter=100000, fit_intercept=False,
                positive=True, random_state=9999, selection='random')
    lin.fit(X_train, Y_train)
    
    print("Intercept = {}".format(lin.intercept_))

    print("score on training set {}".format(lin.score(X_train, Y_train)))

    X_test = test[features]
    Y_test = test[target]
    
    X_test = poly.fit_transform(X_test)

    print("score on test set {}".format(lin.score(X_test, Y_test)))
    
    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Model accuracy before manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))

    coeffs = lin.coef_.copy()
    for k in range(0, coeffs.shape[0]):
        c = coeffs[k]
        if c < trunc_limit:
            coeffs[k] = 0.0

    lin.coef_ = coeffs

    y_true = Y_test
    y_pred = lin.predict(X_test)

    print("Truncating coefficients lower than {}".format(trunc_limit))
    print("Model accuracy after manual truncation of coefficients")
    print("Max absolute error {} microseconds".format(max_error(y_true, y_pred)))
    print("Mean absolute error {} microseconds".format(mean_absolute_error(y_true, y_pred)))
    print("R2 score = {}".format(r2_score(y_true, y_pred)))
    
    pretty_print_polynomial(poly, lin, features)
    
    return lin

In [158]:
def analyze_mnt(df, trunc_limit = 0.001, modulus_power = 6):
    print("Fitting final exp price")
    model = analyze_manual_poly(df, [
        ("group_limbs", 1),
        ("num_pairs", 1),
        ("modulus_limbs", modulus_power)], "gas_corrected", trunc_limit = trunc_limit, degree = 2)
    return model

In [159]:
mnt4_pairs_pairsing = analyze_mnt(mnt4, trunc_limit = 0.001, modulus_power = 4)

Fitting final exp price
['group_limbs^1', 'num_pairs^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4']
Train samples 1123, test samples 125
Intercept = 0.0
score on training set 0.8707607109027689
score on test set 0.8926460706454012
Model accuracy before manual truncation of coefficients
Max absolute error 2041266.0527943857 microseconds
Mean absolute error 330267.37431814586 microseconds
R2 score = 0.8926460706454012
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 2041266.0527943857 microseconds
Mean absolute error 330267.37431814586 microseconds
R2 score = 0.8926460706454012
8778.562588 * group_limbs^1 * num_pairs^1 + 20.657632 * group_limbs^1 * modulus_limbs^3 + 0.345381 * group_limbs^1 * modulus_limbs^4 + 18.153957 * num_pairs^1 * modulus_limbs^3 + 0.91398 * num_pairs^1 * modulus_limbs^4


In [160]:
mnt6_pairs_pairsing = analyze_mnt(mnt6, trunc_limit = 0.001, modulus_power = 6)

Fitting final exp price
['group_limbs^1', 'num_pairs^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4', 'modulus_limbs^5', 'modulus_limbs^6']
Train samples 1123, test samples 125
Intercept = 0.0
score on training set 0.8711012698855227
score on test set 0.893177664692602
Model accuracy before manual truncation of coefficients
Max absolute error 3421202.545458777 microseconds
Mean absolute error 559599.8305442733 microseconds
R2 score = 0.8931776646926021
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 3421202.545458777 microseconds
Mean absolute error 559599.8305442733 microseconds
R2 score = 0.8931776646926021
15093.561272 * group_limbs^1 * num_pairs^1 + 35.313869 * group_limbs^1 * modulus_limbs^3 + 0.523958 * group_limbs^1 * modulus_limbs^4 + 37.433708 * num_pairs^1 * modulus_limbs^3 + 1.037275 * num_pairs^1 * modulus_limbs^4


In [163]:
def analyze_mnt_no_correction(df, trunc_limit = 0.001, modulus_power = 6):
    print("Fitting final exp price")
    model = analyze_manual_poly(df, [
        ("group_limbs", 1),
        ("num_pairs", 1),
        ("modulus_limbs", modulus_power)], "gas", trunc_limit = trunc_limit, degree = 2)
    return model

In [164]:
_ = analyze_mnt_no_correction(mnt4, trunc_limit = 0.001, modulus_power = 4)

Fitting final exp price
['group_limbs^1', 'num_pairs^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4']
Train samples 1123, test samples 125
Intercept = 0.0
score on training set 0.8746340428789476
score on test set 0.8957888457901405
Model accuracy before manual truncation of coefficients
Max absolute error 2024168.3184586875 microseconds
Mean absolute error 325727.7996191577 microseconds
R2 score = 0.8957888457901405
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 2024168.3184586875 microseconds
Mean absolute error 325727.7996191577 microseconds
R2 score = 0.8957888457901405
8733.393584 * group_limbs^1 * num_pairs^1 + 21.860893 * group_limbs^1 * modulus_limbs^3 + 0.305245 * group_limbs^1 * modulus_limbs^4 + 19.323241 * num_pairs^1 * modulus_limbs^3 + 0.873925 * num_pairs^1 * modulus_limbs^4


In [165]:
_ = analyze_mnt_no_correction(mnt6, trunc_limit = 0.001, modulus_power = 6)

Fitting final exp price
['group_limbs^1', 'num_pairs^1', 'modulus_limbs^1', 'modulus_limbs^2', 'modulus_limbs^3', 'modulus_limbs^4', 'modulus_limbs^5', 'modulus_limbs^6']
Train samples 1123, test samples 125
Intercept = 0.0
score on training set 0.8747774968789604
score on test set 0.8961724393034821
Model accuracy before manual truncation of coefficients
Max absolute error 3393357.3555686288 microseconds
Mean absolute error 552208.332648358 microseconds
R2 score = 0.8961724393034821
Truncating coefficients lower than 0.001
Model accuracy after manual truncation of coefficients
Max absolute error 3393357.3555686288 microseconds
Mean absolute error 552208.332648358 microseconds
R2 score = 0.8961724393034821
15026.027661 * group_limbs^1 * num_pairs^1 + 36.704718 * group_limbs^1 * modulus_limbs^3 + 0.496405 * group_limbs^1 * modulus_limbs^4 + 39.535468 * num_pairs^1 * modulus_limbs^3 + 0.957206 * num_pairs^1 * modulus_limbs^4
