# Sport

This notebook shows how to use scikit-network to analyse sport data.

We here consider the results of tennis matches of [ATP Tour](https://en.wikipedia.org/wiki/ATP_Tour) in the period 2001--2016.

In [None]:
from IPython.display import SVG

In [None]:
import numpy as np
import pandas as pd
from scipy import sparse

In [None]:
from sknetwork.data import from_edge_list
from sknetwork.ranking import PageRank, top_k
from sknetwork.topology import get_core_decomposition
from sknetwork.utils import directed2undirected
from sknetwork.embedding import Spectral
from sknetwork.visualization import visualize_graph

## Load data

In [None]:
filename = 'atp.csv'

In [None]:
df = pd.read_csv(filename, sep=';')

In [None]:
df.head()

In [None]:
df = df[df['Comment']=='Completed']

In [None]:
len(df)

## Build graph

In [None]:
edge_list = list(df[['Winner', 'Loser']].itertuples(index=False, name=None))

In [None]:
len(edge_list)

In [None]:
graph = from_edge_list(edge_list, directed=True)

In [None]:
adjacency = graph.adjacency
names = graph.names

In [None]:
adjacency

In [None]:
len(names)

## Ranking

In [None]:
# top-10 players in number of wins
out_weights = adjacency.dot(np.ones(len(names)))
print(names[top_k(out_weights, 10)])

In [None]:
# top-10 players in terms of PageRank
pagerank = PageRank()
adjacency_transpose = sparse.csr_matrix(adjacency.T)
scores = pagerank.fit_predict(adjacency_transpose)
print(names[top_k(scores, 10)])

In [None]:
index = top_k(scores, 10)
sub_adjacency = adjacency[index][:, index]

In [None]:
SVG(visualize_graph(sub_adjacency, names=names[index], scores=scores[index]))

## Core decomposition

In [None]:
adjacency_sym = directed2undirected(adjacency)

In [None]:
values = get_core_decomposition(adjacency_sym)

In [None]:
print(names[values==np.max(values)])

## Embedding

In [None]:
spectral = Spectral(2, normalized=False)

In [None]:
embedding = spectral.fit_transform(adjacency)

In [None]:
mask = values==np.max(values)

In [None]:
SVG(visualize_graph(position=embedding[mask], names=names[mask], scores=scores[mask], node_size=5, width=400, height=1000))
