In [None]:
%matplotlib inline
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import json
def save(name):
    plt.savefig(f'figures/{name}.pdf')

In [None]:
with open('data/parameterised_random.json') as infile:
    data = json.load(infile)

In [None]:
data[0]

In [None]:
df = pd.DataFrame([
    {
        'param_rho': instance['params']['lhs_params']['density'],
        'param_pv': instance['params']['lhs_params']['pv'],
        'param_pc': instance['params']['lhs_params']['pc'],
        'basis_split': instance['params']['beta_params']['basis_split'],
        'beta_param': pd.np.log(instance['params']['alpha_params']['beta_param']),
        'frac_violations': instance['params']['alpha_params']['frac_violations'],
        'nonzeros': instance['nonzeros'],
        'var_degree_max': instance['var_degree_max'],
        'cons_degree_max': instance['cons_degree_max'],
        'var_degree_min': instance['var_degree_min'],
        'cons_degree_min': instance['cons_degree_min'],
        'fractional_primal': instance['fractional_primal'],
        'binding_constraints': instance['binding_constraints'],
        'total_fractionality': instance['total_fractionality'],
        'dual_iterations': instance['clp_dual_iterations'],
        'primal_iterations': instance['clp_primal_iterations'],
    }
    for instance in data
])

In [None]:
sns.pairplot(df,
             x_vars=['param_rho', 'param_pv', 'param_pc', 'basis_split', 'beta_param', 'frac_violations'],
             y_vars=[
                'nonzeros', 'var_degree_min', 'var_degree_max', 'cons_degree_min', 'cons_degree_max',
                 'fractional_primal', 'binding_constraints', 'total_fractionality'])

# Controlling min/max/mean

The aim here is to show that rho controls number of nonzeros, pv/pc control variable and constraint skew *independently of density*.
We also want parameters to be independent of size, hence the use of fractional values.

In [None]:
plt.scatter(x=df.param_rho, y=df.nonzeros)
plt.xlabel('Density Parameter')
plt.ylabel('Number of Nonzeros')
save('density-parameter')

In [None]:
plt.scatter(x=df.param_rho, y=df.param_pc, c=df.nonzeros)

In [None]:
plt.scatter(x=df.param_rho, y=df.param_pc, c=df.nonzeros)
cbar = plt.colorbar()
cbar.set_label('Number of Nonzeros')
plt.xlabel('Density Parameter')
plt.ylabel('PC Parameter')

In [None]:
plt.scatter(x=df.param_rho, y=df.param_pc, c=df.cons_degree_max - df.cons_degree_min)
cbar = plt.colorbar()
cbar.set_label('Max - Min Constraint Degree')
plt.xlabel('Density Parameter')
plt.ylabel('PC Parameter')
save('pc-parameter')

In [None]:
plt.scatter(x=df.param_rho, y=df.param_pc, c=df.cons_degree_min)
cbar = plt.colorbar()
cbar.set_label('Min Constraint Degree')
plt.xlabel('Density Parameter')
plt.ylabel('PC Parameter')

In [None]:
plt.scatter(x=df.param_rho, y=df.param_pv, c=df.var_degree_max)

In [None]:
plt.scatter(x=df.param_rho, y=df.param_pv, c=df.var_degree_min)

In [None]:
df.head()

In [None]:
plt.scatter(x=df.basis_split, y=df.frac_violations, c=df.fractional_primal)

In [None]:
plt.scatter(x=df.basis_split * df.frac_violations, y=df.beta_param, c=df.total_fractionality)

In [None]:
plt.scatter(x=df.basis_split, y=df.binding_constraints, c=df.primal_iterations)
plt.colorbar()

In [None]:
plt.scatter(x=df.binding_constraints - df.basis_split * 50, y=df.primal_iterations, c=df.dual_iterations)

In [None]:
difference = (df.binding_constraints - (df.basis_split * 50).round()).abs()
plt.scatter(x=df.nonzeros, y=difference)
plt.xlabel('Number of NonZeros')
plt.ylabel('Difference between target and actual')
(difference > 0).mean()

In [None]:
difference = (df.binding_constraints - (df.basis_split * 50).round()).abs()
plt.scatter(x=difference, y=df.primal_iterations)
plt.xlabel('Difference between target and actual')
plt.ylabel('Primal iterations')
(difference > 0).mean()

In [None]:
solution_different = (df.binding_constraints - (df.basis_split * 50).round()).abs() > 0
plot_data = pd.DataFrame({
    'different': solution_different,
    'nonzeros': pd.cut(df.nonzeros, bins=10).apply(lambda i: float((i.left + i.right) / 2))
}).groupby('nonzeros').different.mean().reset_index().astype('float').set_index('nonzeros').different.plot()
plt.xlabel('Number of Nonzeros')
plt.ylabel('Pr (Solution Different)')

In [None]:
solution_different = (df.binding_constraints - (df.basis_split * 50).round()).abs() > 0
(
    pd.DataFrame({
    'different': solution_different,
    'nonzeros': df.nonzeros
    }).groupby('nonzeros').different.mean().sort_index()
    .rolling(150, center=True).mean().dropna()
).plot()
plt.xlabel('Number of Nonzeros')
plt.ylabel('Pr (Solution Different)')
save('pr-solution-different')

In [None]:
import statsmodels.api as sm

In [None]:
model = sm.OLS(df['nonzeros'], df[['param_rho', 'param_pv', 'param_pc']])
results = model.fit()
results.summary()

In [None]:
model = sm.OLS(df['cons_degree_min'], df[['param_rho', 'param_pv', 'param_pc']])
results = model.fit()
results.summary()

In [None]:
model = sm.OLS(df['cons_degree_max'], df[['param_rho', 'param_pv', 'param_pc']])
results = model.fit()
results.summary()