# Exploratory: Save dataframes of all possible combos
Natalia Vélez, April 2022

In [57]:
import os
import pandas as pd
import numpy as np
import teaching_models as teach
from os.path import join as opj

Generate all combinations of three examples

In [86]:
def example_combos(prob_idx):
    
    # start blank dataframe
    tuples = [(i, j, k) for i in range(36) for j in range(i+1, 36) for k in range(j+1, 36)]
    index = pd.MultiIndex.from_tuples(tuples, names=['ex_0', 'ex_1', 'ex_2'])
    prob_df = pd.DataFrame(np.zeros((len(index), 4), dtype=int), columns=['A', 'B', 'C', 'D'])
    prob_df.index = index

    # check if each hypothesis contains combos
    truth_table = teach.problem_df(prob_idx)    
    for idx, row in prob_df.iterrows():
        try:
            prob_df.loc[idx] = (truth_table.loc[list(idx)].sum() == 3)*1
        except KeyError: # skip missing indices
            pass
        
    # drop impossible combinations
    prob_df = prob_df.loc[(prob_df.sum(axis=1) > 0), :]
    
    return prob_df

Let's cache the combos for all problems:

In [69]:
out_dir = 'outputs/combos'
os.makedirs(out_dir, exist_ok=True)

for prob_idx in range(40):
    out_file = opj(out_dir, f'problem-{prob_idx:02}_combos.csv')
    prob_df = example_combos(prob_idx)
    
    prob_df.to_csv(out_file)

KeyboardInterrupt: 

In [85]:
example_combos(2)

KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Int64Index([5], dtype='int64', name='idx'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"

In [88]:
df = example_combos(2)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,A,B,C,D
ex_0,ex_1,ex_2,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,2,0,0,0,1
0,1,3,0,0,0,1
0,1,4,0,0,0,1
0,1,6,0,0,0,1
0,1,7,0,0,0,1
...,...,...,...,...,...,...
24,27,28,0,0,0,1
25,26,27,0,1,0,1
25,26,28,0,1,0,1
25,27,28,0,1,0,1


In [77]:
truth_table

hypothesis,A,B,C,D
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,0,0,1
1,1,0,0,1
2,1,0,0,1
3,1,0,0,1
4,1,0,0,1
5,0,0,0,1
6,0,0,0,1
7,1,0,0,1
8,1,0,0,1
9,1,0,0,1


In [81]:
df.loc[1]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
ex_1,ex_2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,6,0,1,0,0
2,7,0,1,0,0
2,8,0,1,0,0
2,12,0,1,0,0
2,13,0,1,0,0
2,14,0,1,0,0
2,15,0,1,0,0
2,20,0,1,0,0
2,21,0,1,0,0
6,7,0,1,0,0
