## Import Data & 

In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import re

In [None]:
#Open the files and store them as dataframes, data must be inserted into data file
dfMenu = pd.read_csv("data/Menu.csv")
dfMenuPage = pd.read_csv("data/MenuPage.csv")
dfMenuItem = pd.read_csv("data/MenuItem.csv")

## Cleaning the Data
Only 100 sponsors are in the dictionary due to the large size of the files. 

In [None]:
menuDict = {} #create empty dict

for index, row in dfMenu.iterrows(): #iterate through the rows
    menuId = row[0] #save the Menu ID
    sponsor = row[2] #save the sponsor name
    
    #break if the length of the dictionary is 100
    if len(menuDict) == 100:
        break
    
    if re.search(r'\bDINNER\b', str(row[3])): #use regex to find when dinner is stated in the event row
        dishList = [] #create empty list to store dishes
        #find the ids(which is menu_page_id in MenuItem.csv) at the menuID for this individual menu
        seriesPageID = dfMenuPage.query(f'menu_id == {menuId}')['id']

        #for each menupage ID...
        for menuPageID in seriesPageID:
            seriesDishID = dfMenuItem.query(f'menu_page_id == {menuPageID}')['dish_id'].dropna() #...find the item IDs for each menuPageID and drop NaN values
            
            #find each dishID
            for dishID in seriesDishID:
                
                dishList.append(int(dishID)) #append the dish to the dish list

        #add the dish to the dictionary, making sure to append if it already exists
        if sponsor in menuDict:
            menuDict[sponsor] += dishList
        else:   
            menuDict[sponsor] = dishList

## Creating the network

In [None]:
g = nx.Graph()

In [None]:
for sponsor1, dishID1 in menuDict.items():
    print(sponsor1)
    print(dishID1)

In [None]:
for sponsor in menuDict:
    
    #Create a node for every sponsor
    g.add_node(str(sponsor))
    
    i=0
    menuItems = list(menuDict.items())
    
    #double iterate through the sponsors and dishes
    for sponsor1, dishlist1 in menuItems:
        for sponsor2, dishlist2 in menuItems[i+1:]:
            
            #double iterate throush the dish lists
            for dishID1 in dishlist1:
                for dishID2 in dishlist2:
                #print(commonDish)
                #get current weight if it exits
                    if dishID1 == dishID2:
                        current_weight = g.get_edge_data(str(sponsor1), str(sponsor2), default={"weight": "0"})["weight"]
                        
                        #add an edge
                        g.add_edge(str(sponsor1), str(sponsor2), weight=int(current_weight)+1)
                
        i += 1

Print the number of nodes and edges, useful for double-checking, especially to make sure the graph is imported correctly into Gephi

In [None]:
print("Number of nodes:", len(g.nodes))
print("Number of edges:", len(g.edges))

Write the graph. File can be opened with Gephi

In [None]:
#Write the graph for viewing
nx.write_graphml(g, "menu.graphml")

### Calculate the top and bottom three for Degree of Centrality

In [None]:
# Calculate degree centrality for all nodes
centrality_degree = nx.degree_centrality(g)

# sort node-centrality dictionary by metric, and reverse to get top elements first
i=1
for u in sorted(centrality_degree, key=centrality_degree.get, reverse=True)[:3]:
    print(f"{i}. {u} has a centrality of {centrality_degree[u]}")
    i+=1

print("===================================================")

# sort node-centrality dictionary by metric to get bottom elements.
i=1
for u in sorted(centrality_degree, key=centrality_degree.get)[:3]:
    print(f"{i}. {u} has a centrality of {centrality_degree[u]}")
    i+=1

Bias check, read medium post for more information about this

In [None]:
print(f"Length: {len(menuDict['CUNARD LINE'])}")
print(f"Length: {len(menuDict['HOTEL SAVOY'])}")
print(f"Length: {len(menuDict['MAXWELL HOUSE'])}")

print("===================================================")

print(f"Length: {len(menuDict['MR. S.R.BLOOMFIELD'])}")
amerique = 'LEGATION DES ETAT-UNIS D\'AMERIQUE'
print(f"Length: {len(menuDict[amerique])}")
print(f"Length: {len(menuDict['TIMEO HOTEL'])}")