In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import igraph as ig


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Creating a sample graph from scratch

In [None]:
g = ig.Graph()

The above statement created an undirected graph with no vertices or edges and assigned it to the variable g. We called igraph inbuild class Graph() to build graph g. To confirm that it’s really an igraph graph, we can print it:

In [None]:
g

This tells us that g is an instance of igraph’s Graph class and that it is currently living at the memory address 0x7f9dd05d4650 (the exact output will almost surely be different for your platform). To obtain a more user-friendly output, we can try to print the graph using Python’s print statement:****

In [None]:
print(g)

Above we can find some important information about the graph:

* Four character long code, where 'U' means undirected graph
* First number - the number of nodes
* Second number - the number of edges in the graph

In [None]:
# Let's add some vertices
g.add_vertices(3)
#0, 1, 2

The above statement access the add_vertices() method of the Graph class using the same class object g, and it adds the given number of vertices to the graph.

In [None]:
print(g)

Now our graph has three vertices but no edges. Let’s add some edges as well! We can add edges by calling add_edges() method of the Graph class using the object g - but in order to add edges, we have to refer to existing vertices somehow. igraph uses integer vertex IDs starting from zero, thus the first vertex of your graph has index zero, the second vertex has index 1 and so on. Edges are specified by pairs of integers, so [(0,1), (1,2)] denotes a list of two edges: one between the first and the second, and the other one between the second and the third vertices of the graph. Passing this list to add_edges() method adds these two edges to our graph:

In [None]:
g.add_edges([(0,1), (1,2)])
print(g)

Above we can find some important information about the graph:

* Four character long code, where 'U' means undirected graph
* First number - the number of nodes
* Second number - the number of edges in the graph
* List of edges

In [None]:
#Let us go on with our graph g and add some more vertices and edges to it:
g.add_edges([(2, 0)])
g.add_vertices(3)
#0,1,2,3,4,5
g.add_edges([(2, 3), (3, 4), (4, 5), (5, 3)])
print(g)

Note: Edges also have IDs, similarly to vertices; they also start from zero and edges that were added later have higher IDs than edges that were added earlier. Vertex and edge IDs are always continuous, and a direct consequence of this fact is that if we happen to delete an edge, chances are that some (or all) of the edges will be renumbered. Moreover, if we| delete a vertex, even the vertex IDs will change.

In [None]:
#get edge ID using get_id method
g.get_eid(2, 3)

Edges can be deleted by delete_edges() method and it requires a list of edge IDs to be deleted (or a single edge ID). Vertices can be deleted by delete_vertices() and it requires a list of vertex IDs to be deleted (or a single vertex ID).

In [None]:
#delete edge ID 3
g.delete_edges(3)
print(g)

In [None]:
#Let's visualize our graph g
ig.plot(g,bbox=(200,200))

## Creating graphs

In [None]:
# Let’s create an undirected graph with 3 edges.
g1 = ig.Graph(edges=[(0,1), (1,2), (2, 0)], n=3, directed=False)
#0,1,2
print(g1)
ig.plot(g1,bbox=(200,200))

On the plot above we have 3 nodes (0,1,2) and undirected edges that connect nodes next way: 0--1, 1--2, 2--0.

In [None]:
#save graph as png
ig.plot(g1,"g1.png", bbox=(300,300))

In [None]:
#Now let us create another graph:
g2 = ig.Graph(edges=[(0,1), (1,2), (2,0), (3,4), (7,4), (3,6), (1,5)], n=8)
print(g2)
ig.plot(g2, bbox=(200,200))

Above is an example of disconnected graph.

Having no names of the nodes gives limited information, so let us name them:

In [None]:
g3 = ig.Graph(edges=[(0,1), (1,2), (2, 0)], n=3, directed=False)
ig.plot(g3, bbox=(200,200), vertex_label=["Jim", "John", "Jill"])

# Edge, vertex, and network attributes

### Let us assume that our graph is a social network where vertices represent people and edges represent social connections between them.

In [None]:
#Let us create a simple imaginary social network
g = ig.Graph([(0,1), (0,2), (2,3), (3,4), (4,2), (2,5), (5,0), (6,3), (5,6)])

Now, let us assume that we want to store the names, ages and genders of people in this network as vertex attributes, and for every connection, we want to store whether this is an informal friendship tie or a formal tie. Every Graph object contains two special members called vs and es, standing for the sequence of all vertices and all edges, respectively. If you try to use vs or es as a Python dictionary, you will manipulate the attribute storage area of the graph:

In [None]:
g.vs

In [None]:
g.vs["name"] = ["Alice", "Bob", "Claire", "Dennis", "Esther", "Frank", "George"]
g.vs["age"] = [25, 31, 18, 47, 22, 23, 50]
g.vs["gender"] = ["f", "m", "f", "m", "f", "m", "m"]
g.es["is_formal"] = [False, False, True, True, True, False, True, False, False]

In [None]:
print(g)

Now we can see some additional information in summary:

* D or U, for a directed or undirected graph
* N for a named graph (where nodes have a name attribute)
* W for a weighted graph (where edges have a weight attribute)
* B for a bipartite (two-mode) graph (where nodes have a type attribute)

The two numbers that follow (7 9) refer to the number of nodes and edges in the graph. The description also lists node & edge attributes, for example:

* (g) - graph-level attribute
* (v) - vertex-level attribute
* (e) - edge-level attribute

In [None]:
#Change attribute value at specific index
g.es[0]

In [None]:
g.es[0].attributes()

In [None]:
g.es[0]["is_formal"] = True
g.es[0]

# Graph properties and representations

### 1. **Vertex degree:** The degree of a vertex equals the number of edges adjacent to it. In case of directed networks, we can also define in-degree (the number of edges pointing towards the vertex) and out-degree (the number of edges originating from the vertex). igraph is able to calculate all of them using a simple syntax:

In [None]:
#Undirected graph
g_undirected = ig.Graph([(0,1), (0,2), (2,3), (3,4), (4,2), (2,5), (5,0), (6,3), (5,6)], directed=False)
g_undirected.degree()

In [None]:
g_undirected.vs["name"] = ["Alice", "Bob", "Claire", "Dennis", "Esther", "Frank", "George"]
ig.plot(g_undirected, bbox=(200,200), vertex_label=g_undirected.vs["name"])

If the graph was directed, we would have been able to calculate the in- and out-degrees separately using g.degree(mode="in") and g.degree(mode="out").

In [None]:
#Directed graph
g_directed = ig.Graph([(0,1), (0,2), (2,3), (3,4), (4,2), (2,5), (5,0), (6,3), (5,6)], directed=True)

In [None]:
g_directed.degree(mode="in")

In [None]:
g_directed.degree(mode="out")

In [None]:
g_directed.vs["name"] = ["Alice", "Bob", "Claire", "Dennis", "Esther", "Frank", "George"]
ig.plot(g_directed, bbox=(200,200), vertex_label=g_directed.vs["name"])

In [None]:
print(g_directed)

In [None]:
print(g_undirected)

### 2. **Edge list:** An edge list is a data structure used to represent a graph as a list of its edges. An edge is defined by its start and end vertex, so each edge may be represented by two numbers. The entire edge list may be represented as a two-column matrix. igraph is able to calculate it using a simple syntax:

In [None]:
print(g_directed.get_edgelist())

### 3. **Adjacency matrix:** An adjacency matrix is a square matrix used to represent a finite graph. The elements of the matrix indicate whether pairs of vertices are adjacent or not in the graph. igraph is able to calculate it using a simple syntax:

In [None]:
print(g_directed.get_adjacency())

### 4. **Adjacency list:** An adjacency matrix is a square matrix used to represent a finite graph. The elements of the matrix indicate whether pairs of vertices are adjacent or not in the graph. igraph is able to calculate it using a simple syntax:

In [None]:
from IPython.display import Image
Image("../input/graphimage/Graphs.png")

# Network Analysis

As we learned basic functions to work with graphs, now we will apply them to use in practice. To be more precise, we will study a dataset to understand how different media organizations are related with each other. That would let us know, which marketing channel is better to use to do advertising.

We will start by loading the data:

In [None]:
nodes = pd.read_csv('../input/network-analysis-data-from-various-sources/InputFileNodes.csv')
edges = pd.read_csv('../input/network-analysis-data-from-various-sources/InputFileEdges.csv')

In [None]:
nodes.head()

In [None]:
nodes.info()

In [None]:
edges.head()

In [None]:
edges.info()

In [None]:
g = ig.Graph.TupleList(edges.values,directed=True,
                       edge_attrs=edges.columns[2:])

In [None]:
print(g)

In [None]:
nodes_df = nodes.set_index(['id'])
for column in nodes_df:
    g.vs[column] = nodes_df.loc[g.vs['name'],column]

In [None]:
nodes_df = nodes.set_index(['id'])
nodes_df.head(3)

In [None]:
print(g)

In [None]:
#Create igraph object using Nodes and Edges datasets
G = ig.Graph.DictList(
          vertices=nodes.to_dict('records'),
          edges=edges.to_dict('records'),
          directed=True,
          vertex_name_attr='id',
          edge_foreign_keys=('from', 'to'));

As you can analyse, total amount of edges is bigger then unique edges with combination (from,to). This shows us that there are nodes with two or more edges.

We will collapse all links of the same type between the same two nodes by summing their weights:

In [None]:
links = edges.groupby(['from', 'to', 'type'])['weight'].sum().reset_index()
print(edges.shape,'\n',links.shape)

In [None]:
g = ig.Graph.TupleList(links.values,directed=True,
                       edge_attrs=links.columns[2:])

for column in nodes_df:
    g.vs[column] = nodes_df.loc[g.vs['name'],column]

In [None]:
ig.plot(g, "Graph.png", bbox=(300,300), vertex_label=g.vs["name"])

# Layouts and plotting

In [None]:
layout1 = g.layout_kamada_kawai()

vertex_type_dict = {"Newspaper": "blue", "TV": "pink", "Online": "red"}
edge_type_dict = {"hyperlink": "black", "mention": "gray"}
visual_style = {}
visual_style["vertex_size"] = 30
visual_style["vertex_color"] = [vertex_type_dict[type_label] for type_label in g.vs["type.label"]]
visual_style["vertex_label"] = g.vs["name"]
visual_style["edge_width"] = [weight/10 for weight in g.es['weight']]
visual_style["edge_color"] = [edge_type_dict[type_label] for type_label in g.es["type"]]
visual_style["layout"] = layout1
visual_style["bbox"] = (400, 400)
visual_style["margin"] = 20
ig.plot(g, "Graph_color.png", **visual_style)