# Practice Session 03: Management of networks data

Author: <font color="blue">Nil Tomas Plans</font>

E-mail: <font color="blue">nil.tomas01@estudiant.upf.edu</font>

Date: <font color="blue">4/10/2023</font>

# 1. The flavors bi-partite graph

## 1.1. Read the bipartite graph in a dataframe


In [5]:
# Feel free to add imports if you need them

import io
import csv
import pandas as pd
import networkx as nx

from networkx.algorithms import bipartite

import numpy as np
import matplotlib
import scipy

import itertools

from IPython.display import Image

In [16]:
# Leave this code as-is

INPUT_INGR_FILENAME = "ingredients.tsv"
INPUT_COMP_FILENAME = "compounds.tsv"
INPUT_INGR_COMP_FILENAME = "ingredient-compound.tsv"

In [17]:
# Leave this code as-is

ingredients = pd.read_csv(INPUT_INGR_FILENAME, sep="\t")
display(ingredients.head(3))

compounds = pd.read_csv(INPUT_COMP_FILENAME, sep="\t")
display(compounds.head(3))

ingr_comp = pd.read_csv(INPUT_INGR_COMP_FILENAME, sep="\t")
display(ingr_comp.head(3))


Unnamed: 0,ingredient_id,ingredient_name,ingredient_category
0,0,magnolia_tripetala,flower
1,1,calyptranthes_parriculata,plant
2,2,chamaecyparis_pisifera_oil,plant derivative


Unnamed: 0,compound_id,compound_name,compound_code
0,0,jasmone,488-10-8
1,1,5-methylhexanoic_acid,628-46-6
2,2,l-glutamine,56-85-9


Unnamed: 0,ingredient_id,compound_id
0,1392,906
1,1259,861
2,1079,673


## 1.2. Create the flavors bipartite network

In [57]:
#Flavors
result_flavors = ingredients.set_index('ingredient_id').join(ingr_comp.set_index('ingredient_id'), how='inner')
result_flavors =result_flavors.set_index("compound_id").join(compounds.set_index("compound_id"), how='inner')
display(result_flavors.head(20))

Unnamed: 0_level_0,ingredient_name,ingredient_category,compound_name,compound_code
compound_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,red_bean,vegetable,jasmone,488-10-8
0,jasmine_tea,plant derivative,jasmone,488-10-8
0,jasmine,flower,jasmone,488-10-8
0,soybean,vegetable,jasmone,488-10-8
0,dried_black_tea,plant derivative,jasmone,488-10-8
0,ceylon_tea,plant derivative,jasmone,488-10-8
0,pittosporum_glabratum,plant,jasmone,488-10-8
0,mung_bean,vegetable,jasmone,488-10-8
0,fermented_tea,plant derivative,jasmone,488-10-8
0,fermented_russian_black_tea,plant derivative,jasmone,488-10-8


In [58]:
A=result_flavors.drop(columns=['ingredient_name'])
B=result_flavors.drop(columns=['compound_name'])
A=result_flavors.sort_values(['ingredient_name','compound_name'])
A=result_flavors.reset_index(drop=True)
display(A.head(20))


Unnamed: 0,ingredient_name,ingredient_category,compound_name,compound_code
0,red_bean,vegetable,jasmone,488-10-8
1,jasmine_tea,plant derivative,jasmone,488-10-8
2,jasmine,flower,jasmone,488-10-8
3,soybean,vegetable,jasmone,488-10-8
4,dried_black_tea,plant derivative,jasmone,488-10-8
5,ceylon_tea,plant derivative,jasmone,488-10-8
6,pittosporum_glabratum,plant,jasmone,488-10-8
7,mung_bean,vegetable,jasmone,488-10-8
8,fermented_tea,plant derivative,jasmone,488-10-8
9,fermented_russian_black_tea,plant derivative,jasmone,488-10-8


In [59]:
# modify the flavors dataframe as explained above, and show its first 20 rows
A = result_flavors.drop(columns=['compound_code'])# We eliminate the compound_code column, because we don't need it
A.to_csv('flavors.tsv',sep="\t")
display(A.head(20))

Unnamed: 0_level_0,ingredient_name,ingredient_category,compound_name
compound_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,red_bean,vegetable,jasmone
0,jasmine_tea,plant derivative,jasmone
0,jasmine,flower,jasmone
0,soybean,vegetable,jasmone
0,dried_black_tea,plant derivative,jasmone
0,ceylon_tea,plant derivative,jasmone
0,pittosporum_glabratum,plant,jasmone
0,mung_bean,vegetable,jasmone
0,fermented_tea,plant derivative,jasmone
0,fermented_russian_black_tea,plant derivative,jasmone


In [52]:
#save flavors into a tab-separated file
name_file= "flavors.tsv"
create_tab_file = pd.read_csv(name_file, sep="\t", index_col=0)#we need the index_col=0 parameter, to evite that in the file appears a column called "unnamed:0"
display(create_tab_file.head(20)) 

Unnamed: 0_level_0,ingredient_name,ingredient_category,compound_name
compound_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,red_bean,vegetable,jasmone
0,jasmine_tea,plant derivative,jasmone
0,jasmine,flower,jasmone
0,soybean,vegetable,jasmone
0,dried_black_tea,plant derivative,jasmone
0,ceylon_tea,plant derivative,jasmone
0,pittosporum_glabratum,plant,jasmone
0,mung_bean,vegetable,jasmone
0,fermented_tea,plant derivative,jasmone
0,fermented_russian_black_tea,plant derivative,jasmone


## 1.3. Open this bi-partite network in Cytoscape


In [19]:
# KEEP THIS CELL AS-IS
#in the imatge there's already the legend

# Just adjust width/height if necessary

Image(url="flavors.png", width=1200,height=1200)


In [18]:
# KEEP THIS CELL AS-IS


Image(url="compounds-in-common.png", width=1200)

<font size="+1">The two nodes have 63 nodes in common, and approximately 18 of those contain some type of sulfur. Example: propyl_thioacetate, dimethyl_trisulfide or diallyl_trisulfide</font>

# 2. The ingredient-ingredient graph

## 2.1. Create an ingredient-ingredient.csv file


In [53]:
#list of ingredient names into an array ingredients_array
ingredients_array=np.asarray(ingredients['ingredient_name'])
print("ingredients array: ",ingredients_array)
print("\nnumber of ingredients: ",len(ingredients_array))#print de number of ingredients

ingredients array:  ['magnolia_tripetala' 'calyptranthes_parriculata'
 'chamaecyparis_pisifera_oil' ... 'green_tea' 'artemisia_porrecta_oil'
 'munster_cheese']

number of ingredients:  1530


In [61]:
#reate dictionary ingredient_to_compounds with a set of compounds for each ingredient. 
#Print the number of keys of this dictionary. It should be less than or equal to the number of ingredients

ingredients_array= np.asarray(ingredients['ingredient_name'])
print("There are %d ingredients" % (len(ingredients_array)))

ingredient_to_compounds = {}#dictionary

for index, row in result_flavors.iterrows():
    key = row['ingredient_name']
    value = row['compound_name']
    if key not in ingredient_to_compounds: #if the key is not in the dictionary
        ingredient_to_compounds[key] = set()
    ingredient_to_compounds[key].add(value)#we add the compound to the set

print("Hay %d ingredientes" % len(ingredient_to_compounds))
    


There are 1530 ingredients
Hay 1525 ingredientes


In [103]:
MIN_COMMON_COMPOUNDS=70 #we can test the number in the following cell
ingredient_ingredient = nx.Graph()#we create the graph

for u, v in itertools.combinations(ingredients_array,2):
    if u in ingredient_to_compounds and v in ingredient_to_compounds:
        
        weight = len(ingredient_to_compounds[u].intersection(ingredient_to_compounds[v]))
        if weight >= MIN_COMMON_COMPOUNDS:#if the ingredients have the minimum of compounds 70, then we establish an edge from u to v with weight
            ingredient_ingredient.add_edge(u, v, w=weight)
        

In [101]:
# Leave as-is
print("The ingredient-ingredient graph has %d nodes and %d edges" %
      (ingredient_ingredient.number_of_nodes(), ingredient_ingredient.number_of_edges()))

The ingredient-ingredient graph has 165 nodes and 2153 edges


In [104]:
OUTPUT_INGR_INGR_FILENAME = 'ingredient-ingredient.gml'


In [108]:
nx.write_gml(ingredient_ingredient,OUTPUT_INGR_INGR_FILENAME,None)

## 2.2. Work with this file in Cytoscape

In [109]:
# Change width if necessary

display(Image(url="ingr-ingr.png", width=1200))

display(Image(url="ingr-ingr-legend.gif", width=400))

<font size="+1">Una bona combinació d'aliments és entre la carn i els làctics, en què un cop tens la carn acabada de fer recent, li pots afegir algun formatge fer sobre, i així es desfà (ex:Fried beef i cheedar)
La segona combilació és entre la fruita i els fruits secs (per exemple: mango i peanuts), una combinació molt bona sobretot abans de realitzar qualsevol sessió d'exercici físic, ja que aquests dos aliments aporten moltes vitamines i fibra necessaria per a què la energia que aporten es consumeixi a poc a poc i no en pics </font>

<font size="+2" color="#003300">I hereby declare that, except for the code provided by the course instructors, all of my code, report, and figures were produced by myself.</font>
