# 1. Small-world network example

before you start read this short (2.5 pages) paper

"Emergence of Scaling in Random Networks", Albert Laszlo Barabasi and Reka Albert, https://web.archive.org/web/20120417112354/http://www.nd.edu/~networks/Publication%20Categories/03%20Journal%20Articles/Physics/EmergenceRandom_Science%20286,%20509-512%20(1999).pdf


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
fname = Path('./2022/data/itas_2009-2017_v8.xls')
itas_all = pd.read_excel(fname)

itas_all.head()

In [None]:
title_author = itas_all.groupby(by=['title', 'author_id_new']).count().reset_index()
authors, titles = title_author.title, title_author.author_id_new
title_author.head(5)

### Build Incidence matrix

In [None]:
inc_table = pd.crosstab(authors, titles)
inc_table.head(5).iloc[:, :5]

### Build Adjacency matrix

(very sparse)

In [None]:
adjacency_matrix = inc_table.values.dot(inc_table.values.T)
n_papers = np.diag(adjacency_matrix)
np.fill_diagonal(adjacency_matrix, 0)

In [None]:
plt.imshow(adjacency_matrix, cmap='Paired')
plt.colorbar();

In [None]:
adjacency_matrix[adjacency_matrix>1] = 1

### Plot degree distribution

In [None]:
bins = np.histogram(adjacency_matrix.sum(axis=1), bins=20)

In [None]:
plt.scatter(bins[1][1:], bins[0])
plt.plot(bins[1][1:], bins[0]);

In [None]:
plt.scatter(np.log(bins[1][1:]), np.log1p(bins[0]))
plt.plot(np.log(bins[1][1:]), np.log1p(bins[0]));

### Fitting a line to estimate `alpha`

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:

model = LinearRegression()
model.fit(np.log(bins[1][1:]).reshape(-1,1), np.log1p(bins[0]))
x = np.linspace(1.2, 4.6, 100).reshape(-1,1)
y = model.predict(x)


plt.scatter(np.log(bins[1][1:]), np.log(bins[0]))
plt.plot(np.log(bins[1][1:]), np.log(bins[0]))

plt.plot(x, y, c='r', label=f'alpha = {np.round(model.coef_[0], 3)}')
plt.legend();

# Home reading

"Scale-free networks are rare", Anna D. Broido, Aaron Clauset https://www.nature.com/articles/s41467-019-08746-5

# 2. Build graphs in 3 popular libraries

# 2.1 `networkx`

In [None]:
import networkx as nx

In [None]:
edges = [
    (1, 2),
    (1, 3),
    (2, 3),
    (4, 5),
    (4, 6),
    (5, 6),
    (1, 4),
]
df_edges = pd.DataFrame(data=edges, columns=['source', 'target'])

In [None]:
G = nx.from_pandas_edgelist(df_edges)

In [None]:
# dir(G)

In [None]:
adjacency = np.array(nx.adjacency_matrix(G).todense())
pos = nx.layout.fruchterman_reingold_layout(G)

In [None]:
nx.draw(G, pos=pos)

# 2.2 `igraph`

In [None]:
# !pip install igraph

In [None]:
from igraph import Graph, ADJ_MAX

In [None]:
plt.imshow(adjacency)
plt.colorbar();

In [None]:
g0 = Graph.Weighted_Adjacency(
            adjacency.tolist(), mode=ADJ_MAX, attr='weight')

In [None]:
# dir(g0)

In [None]:
pos = g0.layout_fruchterman_reingold()
pos = dict(zip(range(1,7), pos.coords))

### `igraph` was originally designed for R language, so for plotting let's use `networkx` routines

In [None]:
nx.draw(G, pos=pos)

# 2.3 `pytorch-geometric`

In [None]:
# !pip install torch-geometric

In [None]:
import torch
from torch_geometric.data import Data
import torch_geometric

In [None]:
# create edge index from nx graph

adj = nx.to_scipy_sparse_array(G).tocoo()
row = torch.from_numpy(adj.row.astype(np.int64)).to(torch.long)
col = torch.from_numpy(adj.col.astype(np.int64)).to(torch.long)
edge_index = torch.stack([row, col], dim=0)

In [None]:
# pytorch-geometric allow for features both on vertices and on edges
x = torch.tensor([[-1], [0], [1], [1], [-1], [0]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)

In [None]:
# dir(data)

In [None]:
g = torch_geometric.utils.to_networkx(data, to_undirected=True)
nx.draw(g)

# 2.4 Tensorflow GNN

probably best for production scenario, very shallow learning curve

https://github.com/tensorflow/gnn

In [None]:
#!pip install tensorflow_gnn

# 3. Домашнее задание

## 1. Визуализировать граф соавторств ИТИСа:
1. выделить гигантскую компоненту связности
2. визуализировать GCC используя любой метод укладки на выбор (методы igraph работают быстрее):
    - `dir(nx.layout)`
    - `[d for d in dir(igraph.Graph) if d.startswith('layout_')]`
3. на изображении подписать 10 вершин (авторов) с наибольшей степенью вершины

## 2. Имплементировать модель Барабаси-Альберты: 

```
def generate_barabsi_albert(n: int) -> np.ndarray:
    """Generates graph with `n` nodes via a preferential attachement process. 
    Returns its binary 0/1 adjacency matrix."""
    ...
```

## 3. Проанализировать граф ИТИСа
- Оцените параметр $\lambda$, насколько граф ИТИСа соответствует Power law distribution?

## 4. Home reading
https://math.bme.hu/~gabor/oktatas/SztoM/Newman_Networks.pdf стр. 167-185 Centrality Measures