In [1]:
%matplotlib inline
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import seaborn as sns
from pomegranate import BayesianNetwork
import pygraphviz

In [2]:
SURVEY_PATH = 'datasets/survey'
PROJECT_ROOT_DIR = "."
IMAGES_DIR = 'images'
HISTOGRAMS_DIR = 'images/histograms'
BAYESIAN_DIR = 'images/bayesian'
VISUALISATION_DIR = 'images/visualisations'

In [3]:
def save_fig(folder, fig_id, tight_layout=True):
    if not os.path.isdir(folder):
        os.makedirs(folder)
        
    file_path = os.path.join(folder, fig_id + '.png')
#     path = os.path.join(PROJECT_ROOT_DIR, "images", fig_id + ".png")
#     print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(file_path, format='png', dpi=300)

In [4]:
def load_survey_data():
    df = pd.read_csv('All_Responses_Removed.csv')
    return df

In [5]:
user_responses = load_survey_data()

In [6]:
user_responses.drop(columns=['Prolific ID'], inplace=True)
user_responses.columns = user_responses.columns.str.replace(r'[\s\n\t ]+', '-')
user_responses.columns = user_responses.columns.str.replace(r'[a-d]-', '-')
demographics_data = user_responses.iloc[:, :8]
demographics_user_responses = demographics_data.reindex(sorted(demographics_data.columns), axis=1)
question_subset = user_responses.reindex(sorted(user_responses.columns[8:]), axis=1)
reordered_user_responses = pd.concat([demographics_user_responses, question_subset], axis=1)
relevant_indexes = []
demographics_column_indexes = ['Age', 'Gender', 'IUIPC-Awareness', 'IUIPC-Collection', 'IUIPC-Control',
                               'Online-Presence', 'Personal-Stability', 'Reciprocity']
relevant_indexes.extend(demographics_column_indexes)
# sub_indexes = ['IUIPC-Awareness', 'IUIPC-Collection', 'IUIPC-Control', 'Personal-Stability', 'Reciprocity']
# relevant_indexes.extend(sub_indexes)


In [7]:
reordered_user_responses.loc[reordered_user_responses['Age'] <= 17, 'Age'] = 0
reordered_user_responses.loc[(reordered_user_responses['Age'] > 17) & (reordered_user_responses['Age'] <= 24), 'Age'] = 1
reordered_user_responses.loc[(reordered_user_responses['Age'] > 24) & (reordered_user_responses['Age'] <= 34), 'Age'] = 2
reordered_user_responses.loc[(reordered_user_responses['Age'] > 34) & (reordered_user_responses['Age'] <= 44), 'Age'] = 3
reordered_user_responses.loc[(reordered_user_responses['Age'] > 44) & (reordered_user_responses['Age'] <= 54), 'Age'] = 4
reordered_user_responses.loc[(reordered_user_responses['Age'] > 54) & (reordered_user_responses['Age'] <= 64), 'Age'] = 5
reordered_user_responses.loc[reordered_user_responses['Age'] > 64, 'Age'] = 6

reordered_user_responses.loc[reordered_user_responses['Online-Presence'] <= 5, 'Online-Presence'] = 0
reordered_user_responses.loc[(reordered_user_responses['Online-Presence'] > 5) & (
        reordered_user_responses['Online-Presence'] <= 10), 'Online-Presence'] = 1
reordered_user_responses.loc[(reordered_user_responses['Online-Presence'] > 10) & (
        reordered_user_responses['Online-Presence'] <= 15), 'Online-Presence'] = 2
reordered_user_responses.loc[(reordered_user_responses['Online-Presence'] > 15) & (
        reordered_user_responses['Online-Presence'] <= 20), 'Online-Presence'] = 3
reordered_user_responses.loc[(reordered_user_responses['Online-Presence'] > 20) & (
        reordered_user_responses['Online-Presence'] <= 25), 'Online-Presence'] = 4
labels = reordered_user_responses.iloc[:, 10:207:4]
# labels = labels.replace([1.0, 2.0, 3.0, 4.0], 0)
# labels = labels.replace([5.0, 6.0, 7.0], 1)

In [None]:
sub_labels = labels.iloc[:, 0:51]

fig = plt.figure(figsize=(30,30), dpi=300)
bayesian_net_models = {}
# scenario = [[None, None, None, None, None, None, None, None, None, 1, 2, 7]]
# subset = reordered_user_responses[relevant_indexes]

# sub_labels.fillna(sub_labels.mean(), inplace=True)
# sub_labels = sub_labels.round(0)

column_names = list(sub_labels.columns.values)

model = BayesianNetwork.from_samples(sub_labels, state_names = column_names, algorithm='greedy', n_jobs=-1)
# fig.suptitle('Bayesian Network \n'  + 'Truthfulness', fontsize=20)
plt.title('Exact \n Bayesian Network \n' + '(Truthfulness)', fontsize=20)
model.plot(with_labels=True)

#     save_fig(BAYESIAN_DIR, 'bayesian_net_' + question)

