In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.stats import norm

In [2]:
df = pd.read_csv('data.csv')

In [3]:
mean = df.groupby(['segments', 'intersections']).mean().time
std = df.groupby(['segments', 'intersections']).std(ddof=1).time

In [4]:
mean

segments  intersections
1         0                  1.624204
2         0                  1.729992
          1                  1.788181
3         2                  1.983553
          3                  2.189233
4         3                  2.258621
          4                  2.467372
          5                  2.449600
5         4                  2.549433
45        199               10.335832
48        216               10.495238
63        294               11.918963
75        629               20.603448
442       22176            138.704248
Name: time, dtype: float64

In [5]:
std

segments  intersections
1         0                0.487993
2         0                0.467225
          1                0.432492
3         2                0.297811
          3                0.400263
4         3                0.465999
          4                0.509878
          5                0.510565
5         4                0.526478
45        199              1.180913
48        216              1.332894
63        294              1.610612
75        629              2.429758
442       22176            6.179194
Name: time, dtype: float64

In [6]:
model = lambda x, a, b, c: a*x[0]+b*x[1]+c
n = np.array([n for n, i in mean.index])
i = np.array([i for n, i in mean.index])
t = mean.to_numpy()
nlogn = n*np.log(n)
ilogn = i*np.log(n)

popt, pcov = curve_fit(model, (nlogn, ilogn), t, sigma=std, absolute_sigma=True)

In [7]:
t_predict = model((nlogn, ilogn), *popt)
rsq = ((t_predict - t.mean())**2/std**2).sum() / ((t - t.mean())**2/std**2).sum()
rsq

0.999302149774734

In [8]:
a, avar = popt[0], pcov[0,0]
stat = a/(np.sqrt(avar))
pvalue = 2*norm.cdf(-abs(stat))
a, avar, pvalue

(0.046515656493048334, 1.5921690464794805e-05, 2.101783314916946e-31)

In [9]:
b, bvar = popt[1], pcov[1,1]
stat = b/(np.sqrt(bvar))
pvalue = 2*norm.cdf(-abs(stat))
b, bvar, pvalue

(8.697045443331355e-05, 8.869881308289724e-09, 0.35577335520032527)

In [10]:
c, cvar = popt[2], pcov[2,2]
stat = c/(np.sqrt(cvar))
pvalue = 2*norm.cdf(-abs(stat))
c, cvar, pvalue

(1.9116052889719206, 0.02194075978013954, 4.1948154591682375e-38)