In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import yaml
import json
import warnings
import pprint

In [2]:
warnings.filterwarnings("ignore", category=DeprecationWarning)

### Metrics to look at:
* Metrics over time
* Comparison of Hyperparameters
* Metrics at the beginning of training:
    * Why does scaling embeddings screw things up so much? Even when it's just the initialization --> must be an issue at the beginning of training, could be something interesting to look into

In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_colwidth', 999)

In [4]:
def load_yaml(path):
    with open(path, 'r') as f:
        return yaml.safe_load(f)
def load_json(path):
    with open(path, 'r') as f:
        return json.load(f)

In [10]:
model_path = '../models/factorization/2^22/'

In [11]:
os.listdir(model_path)

['checkpoints', 'config.yaml', 'test_1', 'test_2', 'test_3', 'test_4']

In [12]:
config_list = []
test_metric_list = []
oos_metric_list = []
loss_hist_list = []
test_factor_df_list = []


def find_metrics_in_dir(base_path):
    for f in os.listdir(base_path):
        if f in ['300s', '.gitignore', 'addition_baselines', 'addition_small_as_possible']: continue
        print(f)
        subdir_path = base_path + f + '/'
        
        if os.path.exists(subdir_path + 'checkpoints/'):
            config_path = subdir_path + 'config.yaml'
            metrics_path = subdir_path + 'metrics_test.json'
            metrics_oos_path = subdir_path + 'metrics_oos.json'
            loss_hist_path = subdir_path + 'loss_hist.csv'
            
            if not os.path.exists(metrics_path):
                print(f'FAILED TO LOAD {f}')
                continue

            config_list.append(load_yaml(config_path))
            test_metric_list.append(load_json(metrics_path))
            oos_metric_list.append(load_json(metrics_oos_path))
            loss_hist_list.append(pd.read_csv(loss_hist_path))
            
            test_factor_df_list.append(pd.read_csv(subdir_path + 'pred_df_test.csv'))
            
        elif os.path.isdir(subdir_path) and not f=='checkpoints':
            find_metrics_in_dir(subdir_path)

In [13]:
find_metrics_in_dir(model_path)

checkpoints
config.yaml
test_1
FAILED TO LOAD test_1
test_2
FAILED TO LOAD test_2
test_3
FAILED TO LOAD test_3
test_4
FAILED TO LOAD test_4


In [9]:
len(config_list), len(test_metric_list), len(oos_metric_list), len(loss_hist_list), len(test_factor_df_list)

(2, 2, 2, 2, 2)

In [None]:
def flatten_config(config_item):
    expanded = {}
    expanded['base'] = config_item['data']['base']
    expanded['train_data'] = config_item['data']['train_path']
    expanded['train_batch_size'] = config_item['loader']['train']['batch_size']
    for k, v in config_item['model_args'].items():
        expanded[k] = v
    if not 'embedding_initialization' in expanded:
        expanded['embedding_initialization'] = 'normal'
    expanded['optimizer'] = config_item['optimizer']['type']
    for k, v in config_item['optimizer']['opt_args'].items():
        expanded[k] = v
    if not 'weight_decay' in expanded:
        expanded['weight_decay'] = 0
    expanded['gradient_accumulation_steps'] = config_item['optimizer']['gradient_accumulation_steps']
#     handle all model args
#     handle all opt args
    try:
        expanded['num_warmup_steps'] = config_item['scheduler']['n_warmup_steps']
    except KeyError:
        expanded['num_warmup_steps'] = config_item['scheduler']['scheduler_args']['num_warmup_steps']
    expanded['nb_epochs'] = config_item['scheduler']['nb_epochs']
    expanded['max_grad_norm'] = config_item['optimizer']['max_grad_norm']
    
    expanded['effective_train_batch_size'] = expanded['train_batch_size'] * expanded['gradient_accumulation_steps']
    del expanded['train_batch_size']
    del expanded['gradient_accumulation_steps']
    
    
    return expanded

In [None]:
configs = [flatten_config(c) for c in config_list]
config_df = pd.DataFrame.from_dict(configs)

In [None]:
test_metric_list[0]['correct']

In [None]:
def get_nice_metrics(metric_list, suffix = '', just_factorization=True):
    if just_factorization:
        correct = pd.DataFrame.from_dict([{'correct_factorization' : l['correct']['correct_factorization']} for l in metric_list])
    else:
        correct = pd.DataFrame.from_dict([l['correct'] for l in metric_list])
    n_beams = pd.DataFrame.from_dict([l['meta']['n_beams'] for l in metric_list])
    
    def get_loss(metric_dict):
        try:
            return metric_dict['loss']
        except KeyError:
            return metric_dict['test_loss']
        
    loss_df = pd.DataFrame.from_dict([get_loss(l) for l in metric_list])
    
    n_beams.columns = ['n_beams']
    loss_df.columns = ['loss']
    to_return = [correct, n_beams, loss_df]
    if suffix:
        for tmp_df in to_return:
            tmp_df.columns = [str(c) + f'_{suffix}' for c in tmp_df.columns]
    return to_return

In [None]:
merged = pd.concat([config_df] + get_nice_metrics(test_metric_list, 'test') + get_nice_metrics(oos_metric_list, 'oos') + 
                   [pd.DataFrame({'loss_hist' : loss_hist_list})], axis=1)

In [None]:
ignore_cols = ['loss_hist']

In [None]:
merged.iloc[:,:-1]

In [None]:
assert (merged['n_beams_test']==merged['n_beams_oos']).all()
merged['n_beams'] = merged['n_beams_oos']
merged.drop(['n_beams_test', 'n_beams_oos'], axis=1, inplace=True)

In [None]:
# Drop all confiuraiton columns that have no variantion b/c that's not super helpful
drop_cols = []
for c in list(config_df) + ['n_beams']:
    if not c in merged: continue
    if merged[c].nunique()==1:
        drop_cols.append(c)
metric_df_loss_hist = merged.drop(drop_cols, axis=1)
metric_df = metric_df_loss_hist.copy()
metric_df.drop('loss_hist', axis=1, inplace=True)

In [None]:
metrics = ['correct_factorization', 'loss']
splits = ['test', 'oos']
metrics_full = [f'{metric}_{split}' for split in splits for metric in metrics]

In [None]:
metrics_full

In [None]:
metric_df

In [None]:
def make_correlation_plot(metric_df, x, y, clip_outliers = True):
    x_col = metric_df[x]
    y_col = metric_df[y]
    
    is_outlier = (x_col > x_col.mean() + x_col.std() * 5) | (x_col < x_col.mean() - x_col.std() * 5)
    x_col = x_col[~is_outlier]
    y_col = y_col[~is_outlier]
    
    metric_df[~is_outlier].plot.scatter(x=x, y=y)
    
    m, b = np.polyfit(x_col, y_col, 1)
    ax = plt.gca()
    x_vals = np.array(ax.get_xlim())
    plt.plot(x_vals, m*x_vals + b, color='black', linestyle='--', alpha=.5)
    plt.title(f'Correlation between {x} and {y}')
    plt.show()

### Look at some correlations
#### In general:
* Correlation using correct_product is not very strong in many places.
    * I think this makes sense because of the issue where when the model is unable to factor the number, it returns the number because it could be prime

#### Test metrics vs OoS Metrics:
* Test loss is very correlated with Oos. Same for factorization, but not quite as much

#### Is loss correlated of correct factorizatoin?
* Yes! Loss is very correlated with correct factorization

In [None]:
for m in metrics:
    make_correlation_plot(metric_df, x=m + f'_{splits[0]}', y=m + f'_{splits[1]}')

In [None]:
for m in metrics:
    if m == 'loss' : continue
    for s in splits:
        make_correlation_plot(metric_df, x=f'loss_{s}', y = f'{m}_{s}')

In [None]:
hyperparam_cols = [c for c in metric_df.columns if not c in metrics_full + ignore_cols]

In [None]:
hyperparam_cols

In [None]:
metrics_lower_is_better = {'loss'}

def style_col_axis_1(col, n_highlight=3, col_id_name="id"):
#     if col.name==col_id_name:
#         return [f'background-color: {col[i]}' for i in range(len(col))]
    if len(set(col.name.split('_')).intersection(metrics_lower_is_better)):
        top_indicies = np.argsort(col.values)[:n_highlight]
    else:
        top_indicies = np.argsort(col.values)[::-1][:n_highlight]
    alphas = np.zeros(len(col))
    for i in range(len(top_indicies)):
        alphas[top_indicies[i]] = 1 - i/n_highlight
    return np.array([f'background-color: rgba(0,169,0,{alphas[i]})' for i in range(len(col))])

def style_df(df, ablated_hparams = None, n_highlight=3, col_id_name="color"):
    if ablated_hparams is None:
        ablated_hparams = []
    to_return = np.zeros_like(df.values, dtype=np.object)
    df_cols = df.columns
#     display(df)
    for i in range(df.shape[1]):
        this_col = df_cols[i]
        if this_col in ablated_hparams: continue
        if this_col in hyperparam_cols:
            to_return[:,i] = np.array([f'background-color: {df[col_id_name].iloc[j]}' for j in range(df.shape[0])])
#             print(to_return)
        else:
            to_return[:,i] = style_col_axis_1(df.iloc[:,i], n_highlight, col_id_name)
    return to_return

In [None]:
default_mapper = {
    'num_heads' : 8
}

for k, v in default_mapper.items():
    if k in metric_df:
        metric_df[k] = metric_df[k].fillna(v)
        metric_df_loss_hist[k] = metric_df_loss_hist[k].fillna(v)

In [None]:
metric_df.sort_values(hyperparam_cols).style.apply(style_col_axis_1, axis=0, subset=metrics_full)

In [None]:
def add_identifier_for_hparam_sets(df, hparam_cols, values, col_name='color'):
    df['groupby_hps'] = df[hparam_cols].apply(lambda x: '_'.join([str(y) for y in x]), axis=1)
    df.reset_index(drop=True, inplace=True)
    
    df[col_name] = values[0]
    value_counter = 0
    df.sort_values('groupby_hps', inplace=True)
    df.reset_index(inplace=True, drop=True)
    for i in range(1, df.shape[0]):
        this_row_hps = df.iloc[i]['groupby_hps']
        prev_row_hps = df.iloc[i-1]['groupby_hps']
        if not this_row_hps==prev_row_hps:
            value_counter +=1
#         print(i, col_name, values[value_counter])
        df.loc[i, col_name] = values[value_counter%len(values)]
        if value_counter>=len(values):
            warnings.warn(f'More unique combinations of hyperparams found than unique {col_name}; {col_name} will not be unique')
    df.drop('groupby_hps', axis=1, inplace=True)
    return df


In [None]:
def drop_nonunique_hparams(df):
    for c in hyperparam_cols:
        if not c in df:
            continue
        if df[c].nunique()==1:
            df.drop(c, axis=1, inplace=True)
    return df

In [None]:
class SelfReturnDict(dict):
    def __getitem__(self, idx):
        if idx in self:
            return super().__getitem__(idx)
        return idx

In [None]:
colors = [
    'rgba(255,0,0,.5)', 
    'rgba(0,255,0,.5)',
    'rgba(0,0,255,.5)',
    'rgba(48,213,200,.5)', 
    'rgba(159,43,104,.5)', 
    'rgba(230,190,138,.5)',
    'rgba(123,255,0)'
    
    
]

def ablate_hyperparameter(metric_df, hparams, show_specific_hparams=None, show_all_other_hparams=False, show_if_nonconstant=True, sort_by='other'):
    if not sort_by in ['inputs', 'other']:
        raise ValueError('expected sortby to be inputs or other')
    id_col_name = 'color'
    if isinstance(hparams, str):
        hparams = [hparams]
    hparams = hparams
    groupby_hps = [c for c in hyperparam_cols if not c in hparams]
    grouped = metric_df.groupby(groupby_hps)
    grouped = grouped.filter(lambda x: len(x) > 1)
    grouped = add_identifier_for_hparam_sets(grouped, groupby_hps, colors, id_col_name)

    
    # Have a list of params want to show
    # Want to show just ones that have unique values
    # Show all of them
    
    if show_specific_hparams:
          if isinstance(show_specific_hparams, str):
            show_specific_hparams = [show_specific_hparams]
    else:
        show_specific_hparams = []
    
    show_cols = [c for c in grouped.columns if not (c in groupby_hps) or c==id_col_name]
    for c in groupby_hps:
        if (show_all_other_hparams) or (c in show_specific_hparams) or (show_if_nonconstant and grouped[c].nunique() > 1):
            show_cols.append(c)
            
    grouped = grouped[[c for c in show_cols]]
    if sort_by=='inputs':
        grouped.sort_values(hparams, inplace=True)
        grouped.reset_index(inplace=True, drop=True)


        
    return grouped.style.hide_columns([id_col_name]).apply(style_df, axis=None, ablated_hparams = hparams, n_highlight=1)

In [None]:
valid_markers = ['o', 's', '*', 'p', 'D']
def ablate_loss_hist_plot(metric_df, hparams, plot_metric = 'test_loss', max_imgs_per_row = 3, name_mapper = None):
    if not name_mapper:
        name_mapper = {}
    name_mapper = SelfReturnDict(name_mapper)
    if isinstance(hparams, str):
        hparams = [hparams]
    groupby_hps = [c for c in hyperparam_cols if not c in hparams + ['loss_hist']]
    if not groupby_hps:
        grouped = metric_df
    else:
        grouped = metric_df.copy().groupby(groupby_hps)
        grouped = grouped.filter(lambda x: len(x) > 1).reset_index(drop=True)
    grouped = add_identifier_for_hparam_sets(grouped, groupby_hps, valid_markers, col_name='marker')
    grouped.sort_values(hparams, inplace=True)
    grouped.reset_index(inplace=True, drop=True)
    
    
    grouped = drop_nonunique_hparams(grouped)
    grouped.reset_index(drop=True, inplace=True)
    remaining_hparams = [c for c in list(grouped) if c in hyperparam_cols]
    remaining_noninput_hparams = [p for p in remaining_hparams if not p in hparams and not p=='loss_hist']
    
    other_hyperparam_values = grouped[remaining_noninput_hparams].apply(lambda x: ', '.join([str(y) for y in x]), axis=1).unique().tolist()
    n_remaining_noninput_vals = len(other_hyperparam_values)
    
    if n_remaining_noninput_vals > 1:
        n_cols = min(max_imgs_per_row, n_remaining_noninput_vals)
        n_rows = n_remaining_noninput_vals // n_cols + int(bool(n_remaining_noninput_vals % n_cols))
        fig, ax = plt.subplots(n_rows, n_cols)
        if ax.ndim==1:
            ax = np.array([ax])
        idx_to_ax = lambda idx: ax[idx // n_cols, idx % n_cols]
        
    else:
        fig = plt.gcf()
        ax = plt.gca()
        n_rows = 1
        n_cols = 1
        def _idx_to_row_col(idx):
            assert idx == 0
            return ax
        idx_to_ax = lambda idx: _idx_to_row_col(idx)
    
    def add_plot_to_ax(sub_df, ax, title):
        for j in range(sub_df.shape[0]):
            row = sub_df.iloc[j]
            loss_hist = row['loss_hist']
            loss_hist.plot(x='step', y=plot_metric, ax=ax, label = row['mpl_label'], marker=row['marker'])
        ax.set_ylabel(plot_metric)
        ax.legend()
        ax.set_title(title)
    
    
    
    grouped['mpl_label'] = grouped[remaining_hparams].apply(lambda x: ', '.join([f"{name_mapper[c]}: {x[c]}" for c in hparams]),axis=1)
    fig.set_size_inches(6*n_cols,  6* n_rows)
    if len(remaining_noninput_hparams):
        for i, (name, sub_grouped_df) in enumerate(grouped.groupby(remaining_noninput_hparams)):
            if not isinstance(name, tuple):
                name = (name,)
            plot_title = pprint.pformat({hp : val for hp, val in zip(remaining_noninput_hparams, name)})
            add_plot_to_ax(sub_grouped_df, idx_to_ax(i), title=plot_title)
    else:
        add_plot_to_ax(grouped, idx_to_ax(0), ', '.join(hparams))
    
    
    fig.tight_layout()
    plt.show()

## "Ablate" some hyperparamaters:
* For a given set of hyperparamaters, find sets of runs where all other hyperparameters are the same, but these are varied
* Plot a loss hist curve for the same collections

In [None]:
metric_df_loss_hist

In [None]:
# display(ablate_hyperparameter(metric_df, 'max_grad_norm', sort_by='other'))
ablate_loss_hist_plot(metric_df_loss_hist, 'max_grad_norm')
ablate_loss_hist_plot(metric_df_loss_hist, 'max_grad_norm', 'oos_loss')


In [None]:
sys.exit()

In [None]:
display(ablate_hyperparameter(metric_df, 'base', sort_by='other'))
display(ablate_hyperparameter(metric_df, 'base', sort_by='inputs'))
ablate_loss_hist_plot(metric_df_loss_hist, 'base')


# Base

In [None]:
ablate_loss_hist_plot(metric_df_loss_hist, 'base', plot_metric='oos_loss')

# # Encoder/Decoder Layers

In [None]:
display(ablate_hyperparameter(metric_df, ['num_encoder_layers', 'num_decoder_layers'], sort_by='inputs'))
display(ablate_hyperparameter(metric_df, ['num_encoder_layers', 'num_decoder_layers'], sort_by='other'))
ablate_loss_hist_plot(metric_df_loss_hist, ['num_encoder_layers', 'num_decoder_layers'])
ablate_loss_hist_plot(metric_df_loss_hist, ['num_encoder_layers', 'num_decoder_layers'], 'oos_loss')

## Positional Encoding

In [None]:
display(ablate_hyperparameter(metric_df, ['positional_encoding_type', 'repeat_positional_encoding', 'positional_encoding_query_key_only'], 
                      sort_by='other'))
ablate_loss_hist_plot(metric_df_loss_hist, ['positional_encoding_type', 'repeat_positional_encoding', 'positional_encoding_query_key_only'],
                     name_mapper = {
    'positional_encoding_type' : 'PEType',
    'repeat_positional_encoding' : 'RepeatPE',
    'positional_encoding_query_key_only' : 'PE_QK_Only'
})
ablate_loss_hist_plot(metric_df_loss_hist, ['positional_encoding_type', 'repeat_positional_encoding', 'positional_encoding_query_key_only'], 'oos_loss',
                     name_mapper = {
    'positional_encoding_type' : 'PEType',
    'repeat_positional_encoding' : 'RepeatPE',
    'positional_encoding_query_key_only' : 'PE_QK_Only'
})

# Learn Positional Encoding

In [None]:
display(ablate_hyperparameter(metric_df, ['learn_positional_encoding'], sort_by='inputs'))
display(ablate_hyperparameter(metric_df, ['learn_positional_encoding'], sort_by='other'))
ablate_loss_hist_plot(metric_df_loss_hist, ['learn_positional_encoding'])
ablate_loss_hist_plot(metric_df_loss_hist, ['learn_positional_encoding'], 'oos_loss')

# Shared Embeddings

In [None]:
display(ablate_hyperparameter(metric_df, 'shared_embeddings', show_specific_hparams='positional_encoding_type'))
ablate_loss_hist_plot(metric_df_loss_hist, 'shared_embeddings')
ablate_loss_hist_plot(metric_df_loss_hist, 'shared_embeddings', 'oos_loss')

# Extra positional encodings

In [None]:
display(ablate_hyperparameter(metric_df, ['extra_positional_encoding_relative_decoder_mha'], sort_by='inputs'))
ablate_loss_hist_plot(metric_df_loss_hist, ['extra_positional_encoding_relative_decoder_mha'])
ablate_loss_hist_plot(metric_df_loss_hist, ['extra_positional_encoding_relative_decoder_mha'], 'oos_loss')

# Embed Dim

In [None]:
display(ablate_hyperparameter(metric_df, ['embed_dim'], sort_by='inputs'))
ablate_loss_hist_plot(metric_df_loss_hist[metric_df_loss_hist['embed_dim'] < 1024], ['embed_dim'])
ablate_loss_hist_plot(metric_df_loss_hist[metric_df_loss_hist['embed_dim'] < 1024], ['embed_dim'], 'oos_loss')


# Dropout

In [None]:
display(ablate_hyperparameter(metric_df, ['dropout'], sort_by='inputs'))
ablate_loss_hist_plot(metric_df_loss_hist, ['dropout'])
ablate_loss_hist_plot(metric_df_loss_hist, ['dropout'], 'oos_loss')

# Norm First

In [None]:
display(ablate_hyperparameter(metric_df, 'norm_first'))
ablate_loss_hist_plot(metric_df_loss_hist, ['norm_first'])
ablate_loss_hist_plot(metric_df_loss_hist, ['norm_first'], 'oos_loss')

# Weight Decay

In [None]:
display(ablate_hyperparameter(metric_df, ['weight_decay']))
ablate_loss_hist_plot(metric_df_loss_hist, ['weight_decay'])
ablate_loss_hist_plot(metric_df_loss_hist, ['weight_decay'], 'oos_loss')

# Dim Feedforward

In [None]:
display(ablate_hyperparameter(metric_df, ['dim_feedforward'], sort_by='inputs'))
ablate_loss_hist_plot(metric_df_loss_hist, ['dim_feedforward'])
ablate_loss_hist_plot(metric_df_loss_hist, ['dim_feedforward'], 'oos_loss')


# Scale Embeddings

In [None]:
display(ablate_hyperparameter(metric_df, ['scale_embeddings', 'scale_embeddings_at_init'], sort_by='inputs'))
ablate_loss_hist_plot(metric_df_loss_hist, ['scale_embeddings', 'scale_embeddings_at_init'])
ablate_loss_hist_plot(metric_df_loss_hist, ['scale_embeddings', 'scale_embeddings_at_init'], 'oos_loss')

# Num Attention Heads

In [None]:
display(ablate_hyperparameter(metric_df, ['num_heads'], sort_by='other'))
ablate_loss_hist_plot(metric_df_loss_hist, ['num_heads'])
ablate_loss_hist_plot(metric_df_loss_hist, ['num_heads'], 'oos_loss')

# Num warmup steps

In [None]:
display(ablate_hyperparameter(metric_df, ['num_warmup_steps'], sort_by='other'))
ablate_loss_hist_plot(metric_df_loss_hist,  ['num_warmup_steps'])
ablate_loss_hist_plot(metric_df_loss_hist,  ['num_warmup_steps'], 'oos_loss')

# Nb Epochs

In [None]:
display(ablate_hyperparameter(metric_df, ['nb_epochs'], sort_by='other'))
ablate_loss_hist_plot(metric_df_loss_hist,  ['nb_epochs'])
ablate_loss_hist_plot(metric_df_loss_hist,  ['nb_epochs'], 'oos_loss')

In [None]:
display(ablate_hyperparameter(metric_df, ['nb_epochs'], sort_by='other'))
ablate_loss_hist_plot(metric_df_loss_hist,  ['nb_epochs', 'weight_decay'])
ablate_loss_hist_plot(metric_df_loss_hist,  ['nb_epochs', 'weight_decay'], 'oos_loss')

In [None]:
display(ablate_hyperparameter(metric_df, ['nb_epochs'], sort_by='other'))
ablate_loss_hist_plot(metric_df_loss_hist,  ['nb_epochs', 'dropout'])
ablate_loss_hist_plot(metric_df_loss_hist,  ['nb_epochs', 'dropout'], 'oos_loss')

In [None]:
sys.exit()

### What are some hard numbers?

In [None]:
for i in range(len(test_factor_df_list)):
    test_factor_df_list[i]['id'] = i

In [None]:
all_factor_dfs = pd.concat(test_factor_df_list)

In [None]:
def get_top_n_factor_dfs(n, metric = 'loss_oos', descending=False):
    ordering = np.argsort(metric_df[metric]).values
    if descending:
        ordering = ordering[::-1]
        
    top_indicies = set(list(ordering[:n]))
    
    all_factor_dfs['keep'] = all_factor_dfs.apply(lambda x: x['id'] in top_indicies, axis=1)
    subs_df = all_factor_dfs[all_factor_dfs['keep']].copy()
    
    all_factor_dfs.drop('keep', axis=1, inplace=True)
    subs_df.drop('keep', axis=1, inplace=True)
    
    return subs_df
    
    

In [None]:
get_top_n_factor_dfs(3, 'correct_factorization_oos', descending=True)

In [None]:
num_times_correctly_factored = all_factor_dfs.groupby(['input_num', 'id']).agg({'correct_factorization' : 'any'}).groupby(['input_num']).agg({'correct_factorization' : 'sum'})

In [None]:
num_times_correctly_factored['correct_factorization'].hist(bins=16)

In [None]:
num_times_correctly_factored[num_times_correctly_factored['correct_factorization']==0]

In [None]:
num_times_correctly_factored[num_times_correctly_factored['correct_factorization']==0].reset_index()['input_num'].apply(lambda x: x % 2).value_counts()

# This plot looks wrong to me!!!

In [None]:
for name, _ in pd.DataFrame({'a' : [1,2,3], 'b' : [4,5,6], 'c' : [7,8,9]}).groupby(['a','b']):
    print(name)