In [1]:
from plot_funcs import (get_results, get_mean_avg_ranks,
                        get_results_df, plot_avg_ranks, get_ranks_sizes)
from funcs import save_stats_summary
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.formula.api import ols, rlm

Most of the utilities for loading and plotting results are defined in other functions, these notebooks are for making sense of them.

First we will consider loading the raw results.

In [None]:
results = get_results('../exp/results')

These are still in a difficult format to work with, we will plot with results for just the elastic net models and just random parcellations.

In [None]:
plot_avg_ranks(results, random=True, model='elastic')
plt.savefig('Figures/simple_example.png', dpi=300, bbox_inches='tight')

Next plot as log-log

In [None]:
plot_avg_ranks(results, random=True, log=True, model='elastic')
plt.savefig('Figures/simple_example_log.png', dpi=300, bbox_inches='tight')

We can use the following function to get these same results in a dataframe before plotting.

In [None]:
r_df = get_ranks_sizes(results, models=['elastic'], log=True,
                       threshold=False, random=True)
r_df = r_df.sort_values(by='Size')
r_df.head()

We next fit a model simply from log10 mean rank ~ size

In [None]:
model = ols('Mean_Rank ~ Size', data=r_df).fit()
model.summary()

Let's add the fitted model to the plot from before

In [None]:
# Gen plot
_, ax = plt.subplots(figsize=(12, 8))

# Reverse log10 on size
o_size = 10 ** r_df['Size']

# Line of pred. fit
p_fit = 10 ** model.params.Intercept * (o_size **(model.params.Size))

# Plot line of fit
ax.plot(o_size, p_fit, color='blue',
        alpha=.8, label=f'R2={model.rsquared:.3f}')

# Plot data points as before
plot_avg_ranks(results, random=True, log=True,
               model='elastic', ax=ax)

plt.savefig('Figures/simple_example_log_with_fit.png', dpi=300, bbox_inches='tight')

In [None]:
r_df_thresh = get_ranks_sizes(results, models=['elastic'], log=True,
                              threshold=True, random=True).sort_values(by='Size')
model = ols('Mean_Rank ~ Size', data=r_df_thresh).fit()

# Gen plot
_, ax = plt.subplots(figsize=(12, 8))

# Reverse log10 on size
o_size = 10 ** r_df_thresh['Size']

# Line of pred. fit
p_fit = 10 ** model.params.Intercept * (o_size **(model.params.Size))

# Plot line of fit
ax.plot(o_size, p_fit, color='blue',
        alpha=.8, label=f'R2={model.rsquared:.3f}')

# Plot data points as before
plot_avg_ranks(results, random=True, log=True,
               model='elastic', ax=ax)

# Save stats summary as html
save_stats_summary(model, 'stats_example')

# Save Fig
plt.savefig('Figures/simple_example_log_with_fit2.png', dpi=300, bbox_inches='tight')

Now what about averaging

In [None]:
plot_avg_ranks(results, random=True, log=True)
plt.savefig('Figures/all_example_log.png', dpi=300, bbox_inches='tight')

Look at some example specific results:

In [None]:
print(results['random_1000_1---elastic---anthro_waist_cm'][0][0])
print(results['random_1000_1---elastic---cbcl_scr_syn_rulebreak_r'][0][0])
print((results['random_1000_1---elastic---anthro_waist_cm'][0][0] + results['random_1000_1---elastic---cbcl_scr_syn_rulebreak_r'][0][0]) / 2)
print()

print(results['random_10_1---elastic---anthro_waist_cm'][0][0])
print(results['random_10_1---elastic---cbcl_scr_syn_rulebreak_r'][0][0])
print((results['random_10_1---elastic---anthro_waist_cm'][0][0] + results['random_10_1---elastic---cbcl_scr_syn_rulebreak_r'][0][0]) / 2)

The point of this example being to point out that target variables exist on different scales of predictability.