In [49]:
import pandas as pd
import seaborn as sns
sns.set(context="notebook", palette="Spectral", style = 'darkgrid' ,font_scale = 1.5, color_codes=True)
import warnings
warnings.filterwarnings('ignore')
import os
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from scipy.special import ndtri
from scipy.stats import chi2
from scipy.stats import t
from scipy.stats import f
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.regression.linear_model import OLS

### Reduced Model

In [19]:
df_reduced = pd.read_csv(r'../data/direct_transitivity/d_.csv')
df_reduced.head()

Unnamed: 0,Index,File,Nodes,Total Nodes,Butterflies,X-vars,C-vars,Total vars,Total constraints,Crossings,Opttime,Status,Nodes visited,Setup Time
0,0,north/g.10.72.graphml,10,12,0,8,7,15,18,0,0.001962,2,0,0.028
1,1,north/g.13.45.graphml,13,13,0,31,2,33,118,0,7.3e-05,2,0,0.004
2,2,north/g.10.11.graphml,10,10,0,15,0,15,40,0,6.7e-05,2,0,0.002
3,3,Rome-Lib/graficon11nodi/grafo233.11,11,18,0,15,12,27,36,0,5.5e-05,2,0,0.002
4,4,Rome-Lib/graficon12nodi/grafo2240.12,12,16,0,12,11,23,28,0,0.011561,2,1,0.001


In [20]:
cols_of_interest = ['Total vars','Total constraints','Total Nodes','Crossings','Opttime']
df_reduced_subset = df_reduced[cols_of_interest]
df_reduced_subset.rename(columns={'Total vars': 'Total_vars','Total constraints': 'Total_constrains','Total Nodes':'Total_nodes'}, inplace=True)
df_reduced_subset.head()


Unnamed: 0,Total_vars,Total_constrains,Total_nodes,Crossings,Opttime
0,15,18,12,0,0.001962
1,33,118,13,0,7.3e-05
2,15,40,10,0,6.7e-05
3,27,36,18,0,5.5e-05
4,23,28,16,0,0.011561


In [21]:
reduced_model = ols(formula=" Opttime ~ Total_vars + Total_constrains + Total_nodes + Crossings", data=df_reduced_subset).fit()

print(reduced_model.summary())

                            OLS Regression Results                            
Dep. Variable:                Opttime   R-squared:                       0.762
Model:                            OLS   Adj. R-squared:                  0.761
Method:                 Least Squares   F-statistic:                     917.4
Date:                Wed, 06 Dec 2023   Prob (F-statistic):               0.00
Time:                        15:36:35   Log-Likelihood:                -4580.9
No. Observations:                1150   AIC:                             9172.
Df Residuals:                    1145   BIC:                             9197.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept            0.9571      1.023  

In [68]:
sse_r = reduced_model.ssr
df_r = reduced_model.df_resid
print(f"SSE(R) = {sse_r},\nDegrees of freedom = {df_r}")

SSE(R) = 194145.65564334817,
Degrees of freedom = 1145.0


### Full Model

In [34]:
df_full = pd.read_csv(r'../data/direct_transitivity_data.csv')
df_full.head()

Unnamed: 0,Index,File,Nodes,Total Nodes,Butterflies,X-vars,C-vars,Total vars,Total constraints,Crossings,...,config,check_1,check_2,check_3,check_4,check_5,check_6,check_7,check_8,check_9
0,0,north/g.10.72.graphml,10,12,0,8,7,15,20,0,...,d_235689,0,1,1,0,1,1,0,1,1
1,1,north/g.13.45.graphml,13,13,0,6,2,8,8,0,...,d_235689,0,1,1,0,1,1,0,1,1
2,2,north/g.10.11.graphml,10,10,0,15,0,15,40,0,...,d_235689,0,1,1,0,1,1,0,1,1
3,3,Rome-Lib/graficon11nodi/grafo233.11,11,18,0,15,12,27,36,0,...,d_235689,0,1,1,0,1,1,0,1,1
4,4,Rome-Lib/graficon12nodi/grafo2240.12,12,16,0,12,11,23,30,0,...,d_235689,0,1,1,0,1,1,0,1,1


north/g.10.72.graphml                   512
Rome-Lib/graficon62nodi/grafo4566.62    512
Rome-Lib/graficon64nodi/grafo9384.64    512
Rome-Lib/graficon83nodi/grafo6468.83    512
Rome-Lib/graficon79nodi/grafo9381.79    512
                                       ... 
Rome-Lib/graficon38nodi/grafo5499.38    512
Rome-Lib/graficon44nodi/grafo7381.44    512
Rome-Lib/graficon38nodi/grafo6727.38    512
Rome-Lib/graficon58nodi/grafo3343.58    512
Rome-Lib/graficon73nodi/grafo7959.73    512
Name: File, Length: 1150, dtype: int64

In [75]:
cols_of_interest = ['File','Total vars','Total constraints','Total Nodes','Crossings', 'check_1', 'check_2', 'check_3', 'check_4',
 'check_5', 'check_6','check_7', 'check_8', 'check_9','Opttime']
df_full_subset = df_full[cols_of_interest]
df_full_subset.rename(columns={'Total vars': 'Total_vars','Total constraints': 'Total_constrains','Total Nodes':'Total_nodes'}, inplace=True)
df_full_subset.head()

Unnamed: 0,File,Total_vars,Total_constrains,Total_nodes,Crossings,check_1,check_2,check_3,check_4,check_5,check_6,check_7,check_8,check_9,Opttime
0,north/g.10.72.graphml,15,20,12,0,0,1,1,0,1,1,0,1,1,0.000556
1,north/g.13.45.graphml,8,8,13,0,0,1,1,0,1,1,0,1,1,0.000291
2,north/g.10.11.graphml,15,40,10,0,0,1,1,0,1,1,0,1,1,4e-05
3,Rome-Lib/graficon11nodi/grafo233.11,27,36,18,0,0,1,1,0,1,1,0,1,1,0.002283
4,Rome-Lib/graficon12nodi/grafo2240.12,23,30,16,0,0,1,1,0,1,1,0,1,1,0.000364


In [78]:
df_full_subset.groupby(['File']).mean()

Unnamed: 0_level_0,Total_vars,Total_constrains,Total_nodes,Crossings,check_1,check_2,check_3,check_4,check_5,check_6,check_7,check_8,check_9,Opttime
File,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Rome-Lib/graficon100nodi/grafo10372.100,4684.50,21258.500,190.0,50.519531,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,42.888354
Rome-Lib/graficon100nodi/grafo10550.100,4740.75,20871.750,218.0,39.484375,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,38.828083
Rome-Lib/graficon100nodi/grafo10937.100,8550.00,54953.000,203.0,520.349609,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,60.147798
Rome-Lib/graficon100nodi/grafo11613.100,5979.00,31154.750,231.0,65.232422,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,42.766048
Rome-Lib/graficon10nodi/grafo1010.10,46.50,87.500,12.0,0.000000,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.009616
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
north/g.69.3.graphml,601.50,6912.500,69.0,0.000000,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.205033
north/g.69.5.graphml,628.50,7069.000,69.0,0.000000,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.202421
north/g.70.1.graphml,6945.75,42637.500,173.0,95.480469,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,33.518547
north/g.73.8.graphml,4418.25,25772.625,167.0,34.005859,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,12.463597


In [36]:
' + '.join(list(df_full_subset.columns))

'Total_vars + Total_constrains + Total_nodes + Crossings + check_1 + check_2 + check_3 + check_4 + check_5 + check_6 + check_7 + check_8 + check_9 + Opttime'

In [38]:
full_model = ols(formula=" Opttime ~ Total_vars + Total_constrains + Total_nodes + Crossings + check_1 + check_2 + check_3 + check_4 + check_5 + check_6 + check_7 + check_8 + check_9", data=df_full_subset).fit()

print(full_model.summary())

                            OLS Regression Results                            
Dep. Variable:                Opttime   R-squared:                       0.658
Model:                            OLS   Adj. R-squared:                  0.658
Method:                 Least Squares   F-statistic:                 8.696e+04
Date:                Wed, 06 Dec 2023   Prob (F-statistic):               0.00
Time:                        15:47:17   Log-Likelihood:            -2.4328e+06
No. Observations:              588800   AIC:                         4.866e+06
Df Residuals:                  588786   BIC:                         4.866e+06
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           -3.4540      0.080  

In [67]:
sse_f = full_model.ssr
df_f = full_model.df_resid
print(f"SSE(F) = {sse_f},\nDegrees of freedom = {df_f}")

SSE(F) = 133763820.01076062,
Degrees of freedom = 588786.0


### F-test

In [73]:
alpha = 0.05
f_star = ((sse_r - sse_f)/(df_r - df_f))/(sse_f/df_f)
f_star

critical_value = f.ppf(1-alpha, df_r - df_f, df_f)

if (f_star <= critical_value):
    print(f'Conclude Null with f_star={f_star} and crtical value={critical_value}')
else:
    print(f'Conclude Alternate with f_star={f_star} and crtical value={critical_value}')

Conclude Alternate with f_star=1.000494234184039 and crtical value=nan
