In [1]:
gas_factor = 30

In [2]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np
import math

# !!! Determinism !!!
np.random.seed(42)

file_list = ("Arith", "./monte_carlo_arith_deterministic_parallel_unfiltered_200.csv")

def get_df(file):
    (name, path) = file;
    df = pd.read_csv(path)
        
    return df

data = get_df(file_list)

data.head()

Unnamed: 0,modulus_limbs,group_limbs,num_mul_pairs,a_is_zero,ext_degree,run_microseconds_add,run_microseconds_mul,run_microseconds_multiexp
0,4,1,128,0,1,89,86,1277
1,4,1,128,0,3,136,635,11179
2,4,1,64,0,1,17,69,743
3,4,1,64,0,3,84,567,6558
4,4,1,32,0,1,17,60,392


In [3]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def split(df):
    g1 = df[df["ext_degree"] == 1]
    g1_a_non_zero = g1[g1["a_is_zero"] == 0].copy()
    g1_a_is_zero = g1[g1["a_is_zero"] == 1].copy()
    
    g2_ext_2 = df[df["ext_degree"] == 2]
    g2_ext_2_a_non_zero = g2_ext_2[g2_ext_2["a_is_zero"] == 0].copy()
    g2_ext_2_a_is_zero = g2_ext_2[g2_ext_2["a_is_zero"] == 1].copy()
    
    g2_ext_3 = df[df["ext_degree"] == 3]
    g2_ext_3_a_non_zero = g2_ext_3[g2_ext_3["a_is_zero"] == 0].copy()
    g2_ext_3_a_is_zero = g2_ext_3[g2_ext_3["a_is_zero"] == 1].copy()
    
    
    return (g1_a_non_zero, g1_a_is_zero, g2_ext_2_a_non_zero, g2_ext_2_a_is_zero, g2_ext_3_a_non_zero, g2_ext_3_a_is_zero)

(g1_a_non_zero, g1_a_is_zero, g2_ext_2_a_non_zero, g2_ext_2_a_is_zero, g2_ext_3_a_non_zero, g2_ext_3_a_is_zero) = split(data)

g1_a_non_zero.head()


Unnamed: 0,modulus_limbs,group_limbs,num_mul_pairs,a_is_zero,ext_degree,run_microseconds_add,run_microseconds_mul,run_microseconds_multiexp
0,4,1,128,0,1,89,86,1277
2,4,1,64,0,1,17,69,743
4,4,1,32,0,1,17,60,392
6,4,1,16,0,1,9,53,307
8,4,1,8,0,1,9,51,153


In [4]:
def fit_add_and_mul(df):
    addition = df[["modulus_limbs","run_microseconds_add"]].groupby(['modulus_limbs']).mean()
    addition["gas_add"] = addition["run_microseconds_add"].apply(lambda x: gas_factor * math.ceil(x))
    addition.drop('run_microseconds_add', axis=1, inplace=True)
#     addition = df[["modulus_limbs","run_microseconds_add"]].groupby(['modulus_limbs']).median()
    multiplication = df[["modulus_limbs","group_limbs", "run_microseconds_mul"]].groupby(['modulus_limbs', 'group_limbs']).mean()
#     multiplication = df[["modulus_limbs","group_limbs", "run_microseconds_mul"]].groupby(['modulus_limbs', 'group_limbs']).median()
    
    return (addition.copy(), multiplication.copy())

(g1_add_is_zero, g1_m_is_zero) = fit_add_and_mul(g1_a_is_zero)
(g1_add_non_zero, g1_m_non_zero) = fit_add_and_mul(g1_a_non_zero)

In [5]:
def merge_max(non_zero, zero):
    result = non_zero.copy()
    result["gas_add_non_zero"] = result["gas_add"]
    result.drop("gas_add", axis = 1, inplace = True)
    result["gas_add_zero"] = zero["gas_add"]
    result["gas_add"] = result[["gas_add_non_zero", "gas_add_zero"]].apply(lambda x: max(x["gas_add_non_zero"], x["gas_add_zero"]), axis=1)
    result.drop("gas_add_non_zero", axis = 1, inplace = True)
    result.drop("gas_add_zero", axis = 1, inplace = True)
    result.reset_index(inplace = True)

    return result

In [6]:
def merge_max_mul(this, other):
    result = this.copy()
    result["base_1"] = result["base_gas"]
    result.drop("base_gas", axis = 1, inplace = True)
    result["base_2"] = other["base_gas"]
    result["base_gas"] = result[["base_1", "base_2"]].max(axis = 1)
    
    result["gas_per_group_limb_1"] = result["gas_per_group_limb"]
    result.drop("gas_per_group_limb", axis = 1, inplace = True)
    result["gas_per_group_limb_2"] = other["gas_per_group_limb"]
    result["gas_per_group_limb"] = result[["gas_per_group_limb_1", "gas_per_group_limb_2"]].max(axis = 1)
    
    result.drop("base_1", axis = 1, inplace = True)
    result.drop("base_2", axis = 1, inplace = True)
    
    result.drop("gas_per_group_limb_1", axis = 1, inplace = True)
    result.drop("gas_per_group_limb_2", axis = 1, inplace = True)

    return result

In [7]:
g1_add = merge_max(g1_add_non_zero, g1_add_is_zero)

g1_add.head(15)

Unnamed: 0,modulus_limbs,gas_add
0,4,390
1,5,480
2,6,600
3,7,690
4,8,900
5,9,1020
6,10,1260
7,11,1380
8,12,1590
9,13,1800


In [8]:
def fit_mul(df):
    
    results = []
    for limbs, g in df.groupby(["modulus_limbs"]):
        g = g.copy()
        g.reset_index(inplace=True) 
        model = Lasso(alpha=0.0001,precompute=True,max_iter=1000,
            positive=True, random_state=9999, selection='random')
        model.fit(g["group_limbs"][:, np.newaxis], g["run_microseconds_mul"][:, np.newaxis])

        score = model.score(g["group_limbs"][:, np.newaxis], g["run_microseconds_mul"][:, np.newaxis])

        print("R^2 = {}".format(score))

        slope = math.ceil(model.coef_[0]) * gas_factor;
        intercept = math.ceil(model.intercept_[0]) * gas_factor;
        
        results.append([limbs, slope, intercept])
        
    result_df = pd.DataFrame(results, columns = ["modulus_limbs", "gas_per_group_limb", "base_gas"])
    
    return result_df

In [9]:
results_mul_g1 = merge_max_mul(fit_mul(g1_m_is_zero), fit_mul(g1_m_non_zero))

results_mul_g1.head(16)

R^2 = 0.9999199936781422
R^2 = 0.9999674526370343
R^2 = 0.9999771345266874
R^2 = 0.9999888222510447
R^2 = 0.9999952424642169
R^2 = 0.9999679461940769
R^2 = 0.9999331062109333
R^2 = 0.9999955793751287
R^2 = 0.9999559773473287
R^2 = 0.999934525706301
R^2 = 0.9999890301237453
R^2 = 0.9998609648502375
R^2 = 0.9987923061402163
R^2 = 0.9999020953454828
R^2 = 0.999976396595164
R^2 = 0.9999676714718104
R^2 = 0.9999923169679507
R^2 = 0.9999940981554523
R^2 = 0.9999635128134116
R^2 = 0.999934797498945
R^2 = 0.9999943017274658
R^2 = 0.9999356231897188
R^2 = 0.9999314031037259
R^2 = 0.9999896109828192
R^2 = 0.9998630205164802
R^2 = 0.9987490335588167


Unnamed: 0,modulus_limbs,base_gas,gas_per_group_limb
0,4,360,1740
1,5,510,2370
2,6,570,3000
3,7,570,3780
4,8,750,4740
5,9,750,6060
6,10,750,7170
7,11,1320,8340
8,12,1320,9810
9,13,1080,11310


In [10]:
(g2_ext2_add_is_zero, g2_ext2_m_is_zero) = fit_add_and_mul(g2_ext_2_a_is_zero)
(g2_ext2_add_non_zero, g2_ext2_m_non_zero) = fit_add_and_mul(g2_ext_2_a_non_zero)

In [11]:
g2_ext2_add = merge_max(g2_ext2_add_non_zero, g2_ext2_add_is_zero)

g2_ext2_add.head(15)

Unnamed: 0,modulus_limbs,gas_add
0,4,2340
1,5,3330
2,6,4410
3,7,5670
4,8,7230
5,9,8730
6,10,10650
7,11,12720
8,12,15000
9,13,17340


In [12]:
results_mul_g2_ext_2 = merge_max_mul(fit_mul(g2_ext2_m_is_zero), fit_mul(g2_ext2_m_non_zero))

results_mul_g2_ext_2.head(16)

R^2 = 0.9999032316127509
R^2 = 0.9999681308068268
R^2 = 0.9999733740735604
R^2 = 0.9999899395936023
R^2 = 0.9999858908970376
R^2 = 0.9999913834339217
R^2 = 0.999930919383849
R^2 = 0.9999934949846166
R^2 = 0.9999518208815034
R^2 = 0.9999350716384092
R^2 = 0.999982422198231
R^2 = 0.9998627800060036
R^2 = 0.9987468978642111
R^2 = 0.999907823850796
R^2 = 0.9999723156089176
R^2 = 0.9999595991626546
R^2 = 0.9999868513707869
R^2 = 0.9999915820017319
R^2 = 0.9999789320536776
R^2 = 0.9999439514574017
R^2 = 0.9999951358182605
R^2 = 0.9999603153596818
R^2 = 0.9999200939081062
R^2 = 0.9999861347661765
R^2 = 0.9998592050291066
R^2 = 0.9987399838823933


Unnamed: 0,modulus_limbs,base_gas,gas_per_group_limb
0,4,2070,7920
1,5,3270,10800
2,6,4290,13740
3,7,5130,17160
4,8,6600,21930
5,9,7680,27900
6,10,8160,33180
7,11,12660,38700
8,12,13350,45690
9,13,14340,52560


In [13]:
(g2_ext3_add_is_zero, g2_ext3_m_is_zero) = fit_add_and_mul(g2_ext_3_a_is_zero)
(g2_ext3_add_non_zero, g2_ext3_m_non_zero) = fit_add_and_mul(g2_ext_3_a_non_zero)

In [14]:
g2_ext3_add = merge_max(g2_ext3_add_non_zero, g2_ext3_add_is_zero)

g2_ext3_add.head(15)

Unnamed: 0,modulus_limbs,gas_add
0,4,2520
1,5,3570
2,6,4620
3,7,5910
4,8,7590
5,9,9330
6,10,11370
7,11,13590
8,12,15990
9,13,18510


In [15]:
results_mul_g2_ext_3 = merge_max_mul(fit_mul(g2_ext3_m_is_zero), fit_mul(g2_ext3_m_non_zero))

results_mul_g2_ext_3.head(16)

R^2 = 0.9999197718103535
R^2 = 0.9999708628457065
R^2 = 0.9999739351080261
R^2 = 0.9999851677238278
R^2 = 0.9999913880198678
R^2 = 0.9999947988864247
R^2 = 0.9999365947841143
R^2 = 0.999995285460228
R^2 = 0.9999441764076454
R^2 = 0.9999326016516054
R^2 = 0.9999900720944419
R^2 = 0.9998795776766173
R^2 = 0.998773195061207
R^2 = 0.9999102084101484
R^2 = 0.9999758729911811
R^2 = 0.9999807178604547
R^2 = 0.9999913732609275
R^2 = 0.9999811736372592
R^2 = 0.9999911420879548
R^2 = 0.9999386440236944
R^2 = 0.9999935048432042
R^2 = 0.9999308028850838
R^2 = 0.999940693177829
R^2 = 0.9999886302339135
R^2 = 0.9998771773996423
R^2 = 0.9987367295348145


Unnamed: 0,modulus_limbs,base_gas,gas_per_group_limb
0,4,2040,15390
1,5,3660,20640
2,6,4410,26400
3,7,5100,33030
4,8,6840,42390
5,9,8010,54120
6,10,6240,63510
7,11,13470,73650
8,12,13170,86970
9,13,12360,99480


In [16]:
def eval_naive(modulus_limbs, group_limbs, num_pairs, base_costs):
    base = base_costs[base_costs["modulus_limbs"] == modulus_limbs]["base_gas"].array[0]
    per_group_limb = base_costs[base_costs["modulus_limbs"] == modulus_limbs]["gas_per_group_limb"].array[0] * group_limbs
    
    final = num_pairs*(base + per_group_limb)
    
    return final

def estimate_naive(df, base_costs):
    g = df.copy()
    g["gas"] = g["run_microseconds_multiexp"].apply(lambda x: gas_factor * math.ceil(x))
    g.drop("run_microseconds_multiexp", axis = 1, inplace = True)
    g["gas_naive"] = g[["modulus_limbs","group_limbs", "num_mul_pairs"]].apply(lambda x: eval_naive(x["modulus_limbs"], x["group_limbs"], x["num_mul_pairs"], base_costs), axis=1)
    g["gas_factor"] = g[["gas_naive","gas"]].apply(lambda x: x["gas"] * 1.0 / (x["gas_naive"] * 1.0), axis=1)
    factors = g[["num_mul_pairs", "gas_factor"]].groupby(["num_mul_pairs"]).mean()
    factors.reset_index(inplace=True) 
#     factors["log_factor"] = factors["gas_factor"].apply(lambda x: np.log(x))
#     factors = g[["num_mul_pairs", "gas_factor"]].groupby(["num_mul_pairs"]).max()
    return factors

# multiexp_g1 = estimate_naive(g1_a_non_zero, fit_mul(g1_m_non_zero))

# multiexp_g1.head(15)


In [17]:
# import matplotlib.pyplot as plt

# plt.close('all')

# multiexp_g1.plot(x = "num_mul_pairs", y = "log_factor")

In [18]:
# multiexp_g1.plot(x = "num_mul_pairs", y = "gas_factor")

In [23]:
discount_multipiler = 1000

In [24]:
import numpy as np
import scipy

from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

def interpolate(df):
    x = df["num_mul_pairs"].to_numpy()
    y = df["gas_factor"].to_numpy()
    xvals = np.arange(2,128+1,1)
    yinterp = np.interp(xvals, x, y)
    
    subres = []
    subres.append([int(1.0), int(1.2*discount_multipiler)])
    for i in range(len(xvals)):
        subres.append([int(xvals[i]), math.ceil(yinterp[i]*discount_multipiler)])
        
    result = pd.DataFrame(subres, columns = ["num_pairs", "gas_factor_by_multiplier"])     
    
    return result

# t = interpolate(multiexp_g1)

# t.head(15)


In [25]:
def merge_max_multiexp(this, other):
    result = this.copy()
    result["base_1"] = result["gas_factor"]
    result.drop("gas_factor", axis = 1, inplace = True)
    result["base_2"] = other["gas_factor"]
    result["gas_factor"] = result[["base_1", "base_2"]].max(axis = 1)

    result.drop("base_1", axis = 1, inplace = True)
    result.drop("base_2", axis = 1, inplace = True)

    return result

In [26]:
max_multiexp = estimate_naive(g1_a_non_zero, fit_mul(g1_m_non_zero))
max_multiexp = merge_max_multiexp(max_multiexp, estimate_naive(g1_a_is_zero, fit_mul(g1_m_is_zero)))
max_multiexp = merge_max_multiexp(max_multiexp, estimate_naive(g2_ext_2_a_non_zero, fit_mul(g2_ext2_m_non_zero)))
max_multiexp = merge_max_multiexp(max_multiexp, estimate_naive(g2_ext_2_a_is_zero, fit_mul(g2_ext2_m_is_zero)))
max_multiexp = merge_max_multiexp(max_multiexp, estimate_naive(g2_ext_3_a_non_zero, fit_mul(g2_ext3_m_non_zero)))
max_multiexp = merge_max_multiexp(max_multiexp, estimate_naive(g2_ext_3_a_is_zero, fit_mul(g2_ext3_m_is_zero)))

t = interpolate(max_multiexp)

t.head(15)

R^2 = 0.9999020953454828
R^2 = 0.999976396595164
R^2 = 0.9999676714718104
R^2 = 0.9999923169679507
R^2 = 0.9999940981554523
R^2 = 0.9999635128134116
R^2 = 0.999934797498945
R^2 = 0.9999943017274658
R^2 = 0.9999356231897188
R^2 = 0.9999314031037259
R^2 = 0.9999896109828192
R^2 = 0.9998630205164802
R^2 = 0.9987490335588167
R^2 = 0.9999199936781422
R^2 = 0.9999674526370343
R^2 = 0.9999771345266874
R^2 = 0.9999888222510447
R^2 = 0.9999952424642169
R^2 = 0.9999679461940769
R^2 = 0.9999331062109333
R^2 = 0.9999955793751287
R^2 = 0.9999559773473287
R^2 = 0.999934525706301
R^2 = 0.9999890301237453
R^2 = 0.9998609648502375
R^2 = 0.9987923061402163
R^2 = 0.999907823850796
R^2 = 0.9999723156089176
R^2 = 0.9999595991626546
R^2 = 0.9999868513707869
R^2 = 0.9999915820017319
R^2 = 0.9999789320536776
R^2 = 0.9999439514574017
R^2 = 0.9999951358182605
R^2 = 0.9999603153596818
R^2 = 0.9999200939081062
R^2 = 0.9999861347661765
R^2 = 0.9998592050291066
R^2 = 0.9987399838823933
R^2 = 0.9999032316127509
R^2 

Unnamed: 0,num_pairs,gas_factor_by_multiplier
0,1,1200
1,2,888
2,3,764
3,4,641
4,5,594
5,6,547
6,7,500
7,8,453
8,9,438
9,10,423


In [27]:
import json

def serialize_addition_model(df, filename):
    result = {}
    subres = []
    for (index, row) in df.iterrows():
        subres.append([int(row["modulus_limbs"]), int(row["gas_add"])])
    result["price"] = subres
    with open(filename, 'w') as outfile:
        json.dump(result, outfile)
    
    
    
serialize_addition_model(g1_add, "g1_addition.json")
serialize_addition_model(g2_ext2_add, "g2_addition_ext2.json")
serialize_addition_model(g2_ext3_add, "g2_addition_ext3.json")

In [28]:
def serialize_multiplication_model(df, filename):
    result = {}
    subres = []
    subres_per_limb = []
    for (index, row) in df.iterrows():
        subres.append([int(row["modulus_limbs"]), int(row["base_gas"])])
        subres_per_limb.append([int(row["modulus_limbs"]), int(row["gas_per_group_limb"])])
    result["base"] = subres
    result["per_limb"] = subres_per_limb
    with open(filename, 'w') as outfile:
        json.dump(result, outfile)
    
    
    
serialize_multiplication_model(results_mul_g1, "g1_multiplication.json")
serialize_multiplication_model(results_mul_g2_ext_2, "g2_multiplication_ext2.json")
serialize_multiplication_model(results_mul_g2_ext_3, "g2_multiplication_ext3.json")

In [29]:
def serialize_multiexp_model(df, filename):
    result = {}
    subres = []
    max_pairs = 0
    max_discount = 2000
    for (index, row) in df.iterrows():
        subres.append([math.floor(row["num_pairs"]), math.ceil(row["gas_factor_by_multiplier"])])
        max_pairs = max(max_pairs, math.floor(row["num_pairs"]))
        max_discount = min(max_discount, math.ceil(row["gas_factor_by_multiplier"]))
    result["discounts"] = subres
    result["max_pairs"] = max_pairs
    result["max_discount"] = max_discount
    result["discount_multiplier"] = discount_multipiler
    with open(filename, 'w') as outfile:
        json.dump(result, outfile)
    
    
    
serialize_multiexp_model(t, "multiexp_discounts.json")