In [1]:
import dowhy
from dowhy import CausalModel
import itertools
import glob
import os
from rpy2.robjects import r as R
%load_ext rpy2.ipython
import sqlite3
from cdt.causality.pairwise import CDS
import numpy as np
import pandas as pd
import graphviz
import networkx as nx
import itertools
from graphviz import Source
from spellchecker import SpellChecker
np.set_printoptions(precision=3, suppress=True)
np.random.seed(0)

In [6]:
def pairwise(inp):
    for pair in itertools.combinations(inp.items(), 2):
        yield dict(pair)

In [341]:
df = pd.read_csv('dfs_new/inception_utensil_color_bias_FISH_SLICE.csv')

In [342]:
df.head()

Unnamed: 0,colors,y,primary_concept,part,shapes
0,brown,0.016713,spoon,handle,rectangular
1,brown,0.016713,spoon,head,round
2,yellow,0.016713,background,,square
3,black,0.211885,background,,
4,silver,0.211885,butter_knife,,


In [343]:
data, df1 = get_data(df)

In [344]:
G = gen_graph(df1)

In [345]:
df1.head()

Unnamed: 0,colors,y,primary_concept
0,brown,0.016713,spoon
1,brown,0.016713,spoon
2,yellow,0.016713,background
3,black,0.211885,background
4,silver,0.211885,butter_knife


In [299]:
df1[df1.primary_concept=='petri_dish']

Unnamed: 0,colors,y,primary_concept


In [282]:
data, df1 = get_data(df)

In [346]:
data.head()

Unnamed: 0,y,primary_concept_background,primary_concept_butter_knife,primary_concept_fish_knife,primary_concept_fork,primary_concept_guitar_keychain,primary_concept_knife,primary_concept_spatula,primary_concept_spoon,primary_concept_tablespoon,...,color_bronze,color_brown,color_gold,color_golden,color_green,color_grey,color_lightbronze,color_red,color_silver,color_yellow
0,0.016713,0,0,0,0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,0
1,0.016713,0,0,0,0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,0
2,0.016713,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0.211885,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0.211885,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [283]:
G = gen_graph(df1)

In [347]:
nx.drawing.nx_pydot.write_dot(G,'test.dot')


In [348]:
ans = causal_inference(data,'test.dot',G)

In [349]:
ans.head()

Unnamed: 0,treatment,score
0,color_blue,0.5455377034443711
1,primary_concept_spatula,0.490136485265597
2,primary_concept_guitar_keychain,0.3188574116220842
3,color_gold,0.2848738115795171
4,color_red,0.2382934734443708


In [350]:
ans.to_csv('test.csv',index=False)

In [314]:
s = Source.from_file('test.dot')
s.view()

'test.dot.pdf'

In [353]:
def gen_graph(df):
    graph = nx.DiGraph()
    for idx, row in df.iterrows():
        obj = 'primary_concept_'+row['primary_concept']
        color = 'color_'+row['colors']
        label = 'y'
        obj = obj.lower()
        color = color.lower()
        graph.add_edge(obj,label)
        graph.add_edge(color,label)
        graph.add_edge(obj,color)
    return graph

In [2]:
def get_data(df):
    # df['part'] = df['part'].replace(np.nan,'')
    # df['primary_concept'] =  df['part']+ ','+df['primary_concept']
    # df['primary_concept'] = df['primary_concept'].str.strip(',')
    # df['primary_concept'] = df['primary_concept'].replace(',','_of_',regex=True)
    # df['shapes'] = df['shapes'].replace(np.nan,'')
    # df['primary_concept'] = df['primary_concept']+ ',' + df['shapes']
    # df['primary_concept'] = df['primary_concept'].str.strip(',')
    # df['primary_concept'] = df['primary_concept'].replace(',','_of_shape_',regex=True)
    df = df.drop(columns=['part','id'])
    temp = df.replace('',np.nan)
    # df.agg(lambda x: x.dropna().tolist(), axis=1)
    data = pd.get_dummies(temp, prefix=['primary_concept','color','shape'], columns=['primary_concept','colors','shapes'])
    data['y'] = pd.to_numeric(data['y'])
    return data, df

In [3]:
def gen_graph(df):
    g = nx.DiGraph()
    for idx, row in df.iterrows():
        n1 = row['pair1']
        n2 = row['pair2']
        if row['causal'] > 0:
            if n1 == 'y':
                # print('y>0')
                g.add_edge(n2,n1)
            else:
                g.add_edge(n1,n2)
        elif row['causal'] < 0:
            if n2 == 'y':
                print('y<0')
                g.add_edge(n1,n2)
            else:
                g.add_edge(n2,n1)
    return g

In [4]:
def causal_inference(data, filename_dot, G):
    results = []
    for col in G.nodes():
        if col =='y' or col=='\\n':
            continue
        # print(col)
        model=CausalModel(
                data = data,
                treatment=col,
                outcome='y',
                graph=filename_dot,
                missing_nodes_as_confounders=False
        )
        identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
        estimate = model.estimate_effect(identified_estimand,
                                        method_name="backdoor.linear_regression",
                                        control_value=0,
                                        treatment_value=1,
                                        confidence_intervals=False,
                                        test_significance=False)

        tmp = {'treatment':col,'score':str(estimate.value)}
        # print('values',tmp)
        results.append(tmp)
    newlist = sorted(results, key=lambda d: d['score'],reverse=True)
    res = pd.DataFrame.from_dict(newlist)
#     res.to_csv(filename_csv,index=False)
    return res

In [23]:
def causal_mediation(data, filename_dot, G):
    results = []
    for col in G.nodes():
        if col =='y' or col=='\\n':
            continue
        tmp = {}
        # print(col)
        tmp['primary_concept'] = col
        model=CausalModel(
                data = data,
                treatment=col,
                outcome='y',
                graph=filename_dot,missing_nodes_as_confounders=False)

        identified_estimand_nde = model.identify_effect(estimand_type="nonparametric-nde",
                                            proceed_when_unidentifiable=True)
        identified_estimand_nie = model.identify_effect(estimand_type="nonparametric-nie",
                                            proceed_when_unidentifiable=True)
        mediator = identified_estimand_nde.get_mediator_variables()
        
        causal_estimate_nde = model.estimate_effect(identified_estimand_nde,
                                        method_name="mediation.two_stage_regression",
                                        confidence_intervals=False,
                                        test_significance=False,
                                        method_params = {
                                            'first_stage_model': dowhy.causal_estimators.linear_regression_estimator.LinearRegressionEstimator,
                                            'second_stage_model': dowhy.causal_estimators.linear_regression_estimator.LinearRegressionEstimator
                                        }
                                        )
        tmp['nde'] = causal_estimate_nde.value
        if tmp['nde'] is None:
            continue
        med =mediator[0] if len(mediator)>0 else ''
        if len(mediator) > 0:
            tmp['mediator'] = med
            causal_estimate_nie = model.estimate_effect(identified_estimand_nie,
                                        method_name="mediation.two_stage_regression",
                                        confidence_intervals=False,
                                        test_significance=False,
                                        method_params = {
                                            'first_stage_model': dowhy.causal_estimators.linear_regression_estimator.LinearRegressionEstimator,
                                            'second_stage_model': dowhy.causal_estimators.linear_regression_estimator.LinearRegressionEstimator
                                        }
                                        )
            tmp['nie'] = causal_estimate_nie.value
            tmp['total'] = float(tmp['nie']) + float(tmp['nde'])
            tmp['mediation_proportion'] = float(tmp['nie'])/tmp['total']
            
        else:
            tmp['mediator'] = np.nan
            tmp['total'] = tmp['nde']
            tmp['mediation_proportion'] = np.nan
            tmp['nie'] = np.nan
        # print(tmp['total'])
        results.append(tmp)
    newlist = sorted(results, key=lambda d: d['total'],reverse=True)
    return results

In [360]:
tmp = pd.DataFrame([(list(i.items())[0][0],list(i.items())[1][0],np.array(list(i.items())[0][1]),np.array(list(i.items())[1][1])) for i in t], 
                       columns=['pair1','pair2','A','B'])

In [7]:
for file_name in glob.glob('class_concepts_images/*.csv'):
    graph_file = str('cds_obj_color_shape/graphs/'+os.path.basename(file_name)[:-4] + '.dot')
    result_file = str('cds_obj_color_shape/effects/'+os.path.basename(file_name))
    print(result_file)
    if not glob.glob(result_file):
        
        print(graph_file)
        df = pd.read_csv(file_name)
        data, _ = get_data(df)
        t = list(pairwise(data))
        tmp = pd.DataFrame([(list(i.items())[0][0],list(i.items())[1][0],np.array(list(i.items())[0][1]),np.array(list(i.items())[1][1])) for i in t], 
                       columns=['pair1','pair2','A','B'])

        obj = CDS()
        output = obj.predict(tmp[['A','B']])
        tmp['causal'] = output
        tmp = tmp[tmp.causal!=0]
        samples = int(len(tmp)*0.25)
        sliced_df = pd.concat([tmp.head(samples), tmp.tail(samples)])
        G = gen_graph(sliced_df)
        if not glob.glob(graph_file):
            nx.drawing.nx_pydot.write_dot(G,graph_file)
        res = causal_inference(data, graph_file, G)
        res.to_csv(result_file,index=False)
    # result_file_mediation = str('cds_part_obj_color_shape/mediation/'+os.path.basename(file_name))
#     if not glob.glob(result_file_mediation):
#         res_mediation = causal_mediation(data, graph_file, G)
#         mr = pd.DataFrame(res_mediation)
#         mr.replace('',np.nan, inplace=True)

#         mr.to_csv(result_file_mediation,index=False)
    print(graph_file,result_file)

cds_obj_color_shape/effects/squeezenet_utensil_shape_bias_DINNER_CUTLERY.csv
cds_obj_color_shape/graphs/squeezenet_utensil_shape_bias_DINNER_CUTLERY.dot cds_obj_color_shape/effects/squeezenet_utensil_shape_bias_DINNER_CUTLERY.csv
cds_obj_color_shape/effects/inception_imageneta_bias_mantis.csv
cds_obj_color_shape/graphs/inception_imageneta_bias_mantis.dot cds_obj_color_shape/effects/inception_imageneta_bias_mantis.csv
cds_obj_color_shape/effects/inception_utensil_color_bias_FISH_SLICE.csv
cds_obj_color_shape/graphs/inception_utensil_color_bias_FISH_SLICE.dot cds_obj_color_shape/effects/inception_utensil_color_bias_FISH_SLICE.csv
cds_obj_color_shape/effects/squeezenet_utensil_both_bias_DINNER_CUTLERY.csv
cds_obj_color_shape/graphs/squeezenet_utensil_both_bias_DINNER_CUTLERY.dot cds_obj_color_shape/effects/squeezenet_utensil_both_bias_DINNER_CUTLERY.csv
cds_obj_color_shape/effects/squeezenet_utensil_both_bias_FISH_SLICE.csv
cds_obj_color_shape/graphs/squeezenet_utensil_both_bias_FISH_SLIC

In [19]:
df.head()

Unnamed: 0,colors,y,id,primary_concept,part,shapes
0,silver,0.121783,2576,spatula,,
1,green,0.121783,2576,background,,
2,yellow,0.009639,2408,spoon,head,oval
3,yellow,0.009639,2408,spoon,tail,rectangle
4,green,0.009639,2408,background,middle,square


In [73]:
df = pd.read_csv('dfs_new/squeezenet_imageneta_bias_ants.csv')

In [74]:
# df = pd.read_csv(file_name)
data = get_data(df)
t = list(pairwise(data))
tmp = pd.DataFrame([(list(i.items())[0][0],list(i.items())[1][0],np.array(list(i.items())[0][1]),np.array(list(i.items())[1][1])) for i in t], 
               columns=['pair1','pair2','A','B'])

obj = CDS()
output = obj.predict(tmp[['A','B']])
tmp['causal'] = output

In [81]:
tmp.head()

Unnamed: 0,pair1,pair2,A,B,causal
0,y,primary_concept_ant,"[4.869045e-05, 0.0005745088, 0.0005745088, 0.0...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...",-0.055824
1,y,primary_concept_ant_statue,"[4.869045e-05, 0.0005745088, 0.0005745088, 0.0...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...",-0.006799
2,y,primary_concept_background,"[4.869045e-05, 0.0005745088, 0.0005745088, 0.0...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",-0.020397
3,y,primary_concept_background_of_photo,"[4.869045e-05, 0.0005745088, 0.0005745088, 0.0...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",-0.006799
4,y,primary_concept_backpack,"[4.869045e-05, 0.0005745088, 0.0005745088, 0.0...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",-0.006799


In [83]:
tmp[tmp.pair1=='primary_concept_petri_dish']

Unnamed: 0,pair1,pair2,A,B,causal
2769,primary_concept_petri_dish,primary_concept_praying_mantis,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
2770,primary_concept_petri_dish,primary_concept_side_of_leaf,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
2771,primary_concept_petri_dish,primary_concept_side_of_plant,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
2772,primary_concept_petri_dish,primary_concept_string_of_tree,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
2773,primary_concept_petri_dish,primary_concept_sweater,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
2774,primary_concept_petri_dish,primary_concept_top_left_of_can,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
2775,primary_concept_petri_dish,primary_concept_top_of_can,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
2776,primary_concept_petri_dish,primary_concept_top_of_drinking_fountain,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
2777,primary_concept_petri_dish,primary_concept_top_of_prunes,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
2778,primary_concept_petri_dish,primary_concept_tree,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0


In [58]:
df = df.drop(columns='part')

In [None]:
df['label3'] = [[row['primary_concept'], row['part']] if pd.notna(row['part']) else [row['label1']] for idx, row in df.iterrows()]

In [59]:
df['part'] = df['part'].replace(np.nan,'')

KeyError: 'part'

In [47]:
df['primary_concept'] =  df['part']+ ','+df['primary_concept']
df['primary_concept'] = df['primary_concept'].str.strip(',')
df['primary_concept'] = df['primary_concept'].replace(',','_of_',regex=True)

In [49]:
df['primary_concept'] = df['primary_concept'].str.strip(',')

In [53]:
df['primary_concept'] = df['primary_concept'].replace(',','_of_',regex=True)

In [54]:
df.primary_concept.value_counts()

background              28
spoon                   11
fork                     7
spatula                  5
butter_knife             3
knife                    3
tail_of_spoon            2
head_of_fork             2
head_of_knife            2
head_of_spoon            2
middle_of_background     2
teaspoon                 1
cake_lifter              1
tail_of_knife            1
shadow_of_spatula        1
side_of_background       1
bottom_of_utensil        1
bottom_of_knife          1
head_of_butter_knife     1
guitar                   1
top_of_spatula           1
bottom_of_spatula        1
kitchen_spoon            1
handle_of_spatula        1
top_of_background        1
sides_of_background      1
background_of_fork       1
head_of_spatula          1
Name: primary_concept, dtype: int64