In [1]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np
import math

gas_factor = 15

file_list = ("Arith", "./monte_carlo_arith_deterministic.csv")

def get_df(file):
    (name, path) = file;
    df = pd.read_csv(path)
        
    return df

data = get_df(file_list)

data.head()

Unnamed: 0,modulus_limbs,group_limbs,num_mul_pairs,a_is_zero,ext_degree,run_microseconds_add,run_microseconds_mul,run_microseconds_multiexp
0,4,1,128,0,1,85,160,2140
1,4,1,64,0,1,60,160,1288
2,4,1,32,0,1,91,157,836
3,4,1,16,0,1,55,187,679
4,4,1,8,0,1,66,171,392


In [2]:
from sklearn import linear_model
from sklearn.linear_model import Lasso
from scipy.optimize import nnls

def split(df):
    g1 = df[df["ext_degree"] == 1]
    g1_a_non_zero = g1[g1["a_is_zero"] == 0].copy()
    g1_a_is_zero = g1[g1["a_is_zero"] == 1].copy()
    
    g2_ext_2 = df[df["ext_degree"] == 2]
    g2_ext_2_a_non_zero = g2_ext_2[g2_ext_2["a_is_zero"] == 0].copy()
    g2_ext_2_a_is_zero = g2_ext_2[g2_ext_2["a_is_zero"] == 1].copy()
    
    g2_ext_3 = df[df["ext_degree"] == 3]
    g2_ext_3_a_non_zero = g2_ext_3[g2_ext_3["a_is_zero"] == 0].copy()
    g2_ext_3_a_is_zero = g2_ext_3[g2_ext_3["a_is_zero"] == 1].copy()
    
    
    return (g1_a_non_zero, g1_a_is_zero, g2_ext_2_a_non_zero, g2_ext_2_a_is_zero, g2_ext_3_a_non_zero, g2_ext_3_a_is_zero)

(g1_a_non_zero, g1_a_is_zero, g2_ext_2_a_non_zero, g2_ext_2_a_is_zero, g2_ext_3_a_non_zero, g2_ext_3_a_is_zero) = split(data)

g1_a_non_zero.head()


Unnamed: 0,modulus_limbs,group_limbs,num_mul_pairs,a_is_zero,ext_degree,run_microseconds_add,run_microseconds_mul,run_microseconds_multiexp
0,4,1,128,0,1,85,160,2140
1,4,1,64,0,1,60,160,1288
2,4,1,32,0,1,91,157,836
3,4,1,16,0,1,55,187,679
4,4,1,8,0,1,66,171,392


In [3]:
def make_diffs(non_zero, is_zero):
    diff = non_zero.copy();
    diff["run_microseconds_add"] -= is_zero["run_microseconds_add"]
    diff["run_microseconds_mul"] -= is_zero["run_microseconds_mul"]
    diff["run_microseconds_multiexp"] -= is_zero["run_microseconds_multiexp"]
    
    return diff
    
diff = make_diffs(g1_a_non_zero, g1_a_is_zero)

g1_a_is_zero.head()

# diff.head()

Unnamed: 0,modulus_limbs,group_limbs,num_mul_pairs,a_is_zero,ext_degree,run_microseconds_add,run_microseconds_mul,run_microseconds_multiexp
7,4,1,128,1,1,54,156,1916
8,4,1,64,1,1,67,164,1154
9,4,1,32,1,1,57,156,779
10,4,1,16,1,1,52,173,557
11,4,1,8,1,1,43,160,449


In [4]:
def fit_add_and_mul(df):
    addition = df[["modulus_limbs","run_microseconds_add"]].groupby(['modulus_limbs']).mean()
    addition["gas_add"] = addition["run_microseconds_add"].apply(lambda x: gas_factor * math.ceil(x))
#     addition = df[["modulus_limbs","run_microseconds_add"]].groupby(['modulus_limbs']).median()
    multiplication = df[["modulus_limbs","group_limbs", "run_microseconds_mul"]].groupby(['modulus_limbs', 'group_limbs']).mean()
#     multiplication = df[["modulus_limbs","group_limbs", "run_microseconds_mul"]].groupby(['modulus_limbs', 'group_limbs']).median()
    
    return (addition.copy(), multiplication.copy())

(add_is_zero, m_is_zero) = fit_add_and_mul(g1_a_is_zero)
(add_non_zero, m_non_zero) = fit_add_and_mul(g1_a_non_zero)

add_is_zero.head(20)

Unnamed: 0_level_0,run_microseconds_add,gas_add
modulus_limbs,Unnamed: 1_level_1,Unnamed: 2_level_1
4,55.741071,840
5,59.928571,900
6,74.095238,1125


In [5]:
add_non_zero.head(20)

Unnamed: 0_level_0,run_microseconds_add,gas_add
modulus_limbs,Unnamed: 1_level_1,Unnamed: 2_level_1
4,58.205357,885
5,61.651786,930
6,67.333333,1020


In [6]:
# m_is_zero.head(20)

In [7]:
def fit_mul(df):
    results = []
    for limbs, g in df.groupby(["modulus_limbs"]):
#         print(g.index)
        g = g.copy()
        g.reset_index(inplace=True) 
        model = Lasso(alpha=0.0001,precompute=True,max_iter=1000,
            positive=True, random_state=9999, selection='random')
        model.fit(g["group_limbs"][:, np.newaxis], g["run_microseconds_mul"][:, np.newaxis])

        score = model.score(g["group_limbs"][:, np.newaxis], g["run_microseconds_mul"][:, np.newaxis])

        print("R^2 = {}".format(score))

        slope = math.ceil(model.coef_[0]) * gas_factor;
        intercept = math.ceil(model.intercept_[0]) * gas_factor;
        
        results.append((limbs, (slope, intercept)))
    
    return results

In [8]:
results = fit_mul(m_is_zero)

print(results)

R^2 = 0.7870310619091309
R^2 = 0.9922992443678759
R^2 = 0.9449873602051204
[(4, (690, 4095)), (5, (1575, 930)), (6, (1965, 525))]


In [9]:
results = fit_mul(m_non_zero)

print(results)

R^2 = 0.8756400345684758
R^2 = 0.9865431468909722
R^2 = 0.9712371906609466
[(4, (870, 3825)), (5, (1845, 255)), (6, (2115, 1020))]


In [10]:
(add_is_zero, m_is_zero) = fit_add_and_mul(g2_ext_2_a_is_zero)
(add_non_zero, m_non_zero) = fit_add_and_mul(g2_ext_2_a_non_zero)

In [11]:
add_is_zero.head(20)

Unnamed: 0_level_0,run_microseconds_add,gas_add
modulus_limbs,Unnamed: 1_level_1,Unnamed: 2_level_1
4,177.044643,2670
5,206.5625,3105
6,237.583333,3570


In [12]:
add_non_zero.head(20)

Unnamed: 0_level_0,run_microseconds_add,gas_add
modulus_limbs,Unnamed: 1_level_1,Unnamed: 2_level_1
4,163.714286,2460
5,210.089286,3165
6,240.678571,3615


In [13]:
results = fit_mul(m_is_zero)

print(results)

R^2 = 0.9074077160640172
R^2 = 0.9841701189832694
R^2 = 0.9705921687055733
[(4, (2910, 11565)), (5, (5835, 1215)), (6, (7245, 1740))]


In [14]:
results = fit_mul(m_non_zero)

print(results)

R^2 = 0.8715589773647988
R^2 = 0.9811950569327342
R^2 = 0.9674323159025369
[(4, (3345, 12825)), (5, (6675, 3210)), (6, (8325, 1980))]


In [15]:
(add_is_zero, m_is_zero) = fit_add_and_mul(g2_ext_3_a_is_zero)
(add_non_zero, m_non_zero) = fit_add_and_mul(g2_ext_3_a_non_zero)

In [16]:
add_is_zero.head(20)

Unnamed: 0_level_0,run_microseconds_add,gas_add
modulus_limbs,Unnamed: 1_level_1,Unnamed: 2_level_1
4,144.625,2175
5,170.678571,2565
6,218.035714,3285


In [17]:
add_non_zero.head(20)

Unnamed: 0_level_0,run_microseconds_add,gas_add
modulus_limbs,Unnamed: 1_level_1,Unnamed: 2_level_1
4,147.348214,2220
5,191.491071,2880
6,223.5,3360


In [18]:
results = fit_mul(m_is_zero)

print(results)

R^2 = 0.8754362438121857
R^2 = 0.9783092374406274
R^2 = 0.9845932214359416
[(4, (5640, 17175)), (5, (10290, 2415)), (6, (13470, 2595))]


In [19]:
results = fit_mul(m_non_zero)

print(results)

R^2 = 0.8805347424606025
R^2 = 0.980997372984332
R^2 = 0.97233718848938
[(4, (6825, 16845)), (5, (11715, 3210)), (6, (15240, -1710))]
