# Convert CSV to a graph

In [19]:
import networkx as nx
import pandas as pd

In [20]:
def import_csv(filename):
    """ import csv file into a Pandas dataframe """
    return pd.read_csv(filename)

In [23]:
def preprocessing(filename):
    """ make Pandas dataframe easier to work with by:
        - deleting timestamp column
        - making the names column into the row labels
    """
    data = import_csv(filename)
    del data['Timestamp'] #delete timestamp column
    data = data.set_index('Name') # set names column to row labels
    data.index.names = [None] 
    return data

In [25]:
data = preprocessing("Survey.csv")
data.head(3)

Unnamed: 0,Is a hot dog a sandwich?,Is a Klein bottle with filling a ravioli?,Is it possible for one person to produce both a ghost and a zombie when they die?,Is spam good?,Should pineapple go on pizza?
Ghost,No,No,No,Yes,Yes
Zombie,Yes,No,Yes,Yes,No
Turkey,No,No,Yes,Yes,Yes


In [28]:
def initialize_graph(data):
    """ build a graph with the name/identifiers as nodes """
    num_rows = data.shape[0] 
    G = nx.Graph()
    row_names = []
    for (name, b) in data.iterrows(): 
        row_names.append(name)
        G.add_node(name)
    return G

In [31]:
def build_graph(data):
    """ iterates through all question answers and adds an edge when people agree
    """
    for question, answers in data.iteritems(): 
        print(answers)
        for curr_name in row_names:
            for compare_name in row_names:
                if answers[curr_name] == answers[compare_name] and curr_name != compare_name:
                    G.add_edge(curr_name, compare_name)    
    return G

In [32]:
print(G.edges)

[('Ghost', 'Turkey'), ('Ghost', 'Cactus'), ('Ghost', 'Zombie'), ('Ghost', 'Santa Claus'), ('Ghost', 'Cat'), ('Zombie', 'Santa Claus'), ('Zombie', 'Cat'), ('Zombie', 'Turkey'), ('Zombie', 'Cactus'), ('Turkey', 'Cactus'), ('Turkey', 'Santa Claus'), ('Turkey', 'Cat'), ('Santa Claus', 'Cat'), ('Santa Claus', 'Cactus'), ('Cat', 'Cactus')]
