In [1]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
!pip install rdflib



In [3]:
import rdflib
from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph

# Creation of the graph

In [6]:
g = rdflib.Graph()

categories = open("categories.ttl")
ingredients = open("ingredients.ttl")
countries = open("countries.ttl")
nutriscores = open("nutriscores.ttl")
products = open("products.ttl")

g.parse(categories, format='ttl')
g.parse(ingredients, format='ttl')
g.parse(countries, format='ttl')
g.parse(nutriscores, format='ttl')
g.parse(products, format='ttl')

BadSyntax: at line 1 of <>:
Bad syntax (expected directive or statement) at ^ in:
"b''^b'\xc3\xaf\xc2\xbb\xc2\xbf@prefix : <http://mapping.example.com/> .\n@prefix d2rq'..."

# Converting rdflib.Graph to networkx.Graph

In [6]:
mdg = rdflib_to_networkx_multidigraph(g)
dg = rdflib_to_networkx_digraph(g)
# gx = rdflib_to_networkx_graph(g)

In [7]:
mdg.number_of_nodes()

0

In [8]:
mdg.number_of_edges()

0

In [9]:
degrees = [val for (node, val) in mdg.degree()]
sum_degrees = sum(degrees)
avg_degree_g = sum_degrees / mdg.number_of_nodes()
avg_degree_g

12.921215981187077

# Centrality Measures

#### Degree Centrality

A measure of the "connectedness" of a node to all other nodes in a network. "For particular noden it is the fraction of nodes in the graph that it is connected to".

In [10]:
results = nx.degree_centrality(mdg)
ind_rank = {}

for node_id, rank in sorted(results.items(), key=lambda item: item[1], reverse=True)[:10]:
    print("{:6.3f} {}".format(rank, node_id))

 0.307 https://www.bbc.co.uk/ontologies/fo/Ingredient
 0.176 https://schema.org/Product
 0.112 0
 0.106 https://w3id.org/um/ken4256/country/france
 0.105 4.0
 0.066 1
 0.054 https://w3id.org/um/ken4256/ingredient/sel
 0.053 https://w3id.org/um/ken4256/category/plant-based-foods-and-beverages
 0.049 5
 0.048 d


In [11]:
results = nx.degree_centrality(mdg)
ind_rank = {}
topn = 0
for node_id, rank in sorted(results.items(), key=lambda item: item[1], reverse=True):
    if node_id.startswith('https://w3id.org/um/ken4256/ingredient/'):
        print("{:6.5f} {}".format(rank, node_id))
        topn += 1
        if topn > 10:
            break

0.05391 https://w3id.org/um/ken4256/ingredient/sel
0.04094 https://w3id.org/um/ken4256/ingredient/sucre
0.03887 https://w3id.org/um/ken4256/ingredient/eau
0.01820 https://w3id.org/um/ken4256/ingredient/farine_de_bl%C3%A9
0.01400 https://w3id.org/um/ken4256/ingredient/dextrose
0.01208 https://w3id.org/um/ken4256/ingredient/huile_de_tournesol
0.01122 https://w3id.org/um/ken4256/ingredient/conservateur
0.01109 https://w3id.org/um/ken4256/ingredient/acide_citrique
0.01106 https://w3id.org/um/ken4256/ingredient/%C3%A9mulsifiant
0.01031 https://w3id.org/um/ken4256/ingredient/lait
0.01028 https://w3id.org/um/ken4256/ingredient/acidifiant


#### Closeness Centrality

In [12]:
results = nx.closeness_centrality(mdg)
ind_rank = {}

for node_id, rank in sorted(results.items(), key=lambda item: item[1], reverse=True)[:10]:
    print("{:6.3f} {}".format(rank, node_id))

 0.354 https://www.bbc.co.uk/ontologies/fo/Ingredient
 0.176 https://schema.org/Product
 0.108 https://www.bbc.co.uk/ontologies/fo/ShoppingCategory
 0.106 https://w3id.org/um/ken4256/country/france
 0.103 4.0
 0.088 https://schema.org/Country
 0.054 https://w3id.org/um/ken4256/ingredient/sel
 0.053 https://w3id.org/um/ken4256/category/plant-based-foods-and-beverages
 0.053 France
 0.051 1


In [13]:
print("EIGENVECTOR CENTRALITY")
results = nx.eigenvector_centrality(dg)
ind_rank = {}

for node_id, rank in sorted(results.items(), key=lambda item: item[1], reverse=True)[:10]:
    print("{:6.3f} {}".format(rank, node_id))

EIGENVECTOR CENTRALITY
 0.905 https://www.bbc.co.uk/ontologies/fo/Ingredient
 0.408 https://www.bbc.co.uk/ontologies/fo/ShoppingCategory
 0.078 https://schema.org/Country
 0.041 France
 0.034 https://schema.org/Product
 0.021 sel
 0.021 Plant-based foods and beverages
 0.021 https://w3id.org/um/ken4256/country/france
 0.020 4.0
 0.018 Plant-based foods


#### Ranking with PageRank

In [14]:
page_rank = nx.pagerank(dg)

for node_id, rank in sorted(page_rank.items(), key=lambda item: item[1], reverse=True)[:10]:
    print("{:6.3f} {}".format(rank, node_id))

 0.095 https://www.bbc.co.uk/ontologies/fo/Ingredient
 0.020 https://www.bbc.co.uk/ontologies/fo/ShoppingCategory
 0.003 https://schema.org/Product
 0.003 https://schema.org/Country
 0.002 0
 0.002 https://w3id.org/um/ken4256/country/france
 0.001 4.0
 0.001 France
 0.001 1
 0.001 https://w3id.org/um/ken4256/category/plant-based-foods-and-beverages


# Clustering

In [15]:
cl = nx.clustering(dg, weight=10)
print("CLUSTERS")
print("=============")
print("The graph has {} clusters".format(len(cl)))
for i,c in enumerate(cl):
    print("Cluster {} has {} nodes".format(i, len(c)))
print()

KeyboardInterrupt: 

In [None]:
def transform_str(x):
    return str(x)

In [None]:
dg = rdflib_to_network_digraph(g, transform_s=transform_str, transform_o=transform_str)
G = dg.to_undirected()

In [None]:
preds= nx.jaccard_coefficient(G,[(u,v)])
for u, v, p in preds:
    print(f"({u}, {v}) -> {p:.8f}")

In [None]:
preds= nx.adamic_adar_index(G,[(u,v)])
for u, v, p in preds:
    print(f"({u}, {v}) -> {p:.8f}")

## Using network similarity, find the top 10 most similar ingredients to "golden zucchini"

In [None]:
u = 'https://w3id.org/um/ken4256/ingredient/golden_zucchini'
pairs =[]
for v in G.nodes:
    if v.startswith('https://w3id.org/um/ken4256/ingredient/') and v != u: 
        pairs.append((u,v))
pairs