In [1]:
import pandas as pd
import networkx as nx
import numpy as np
import seaborn as sns; sns.set(style='white')
import matplotlib.pyplot as plt

In [2]:
from unidecode import unidecode

In [3]:
cnts = pd.read_csv("../data/pre-process/contratos_4.csv",
                   parse_dates=['start_date'])

## One last dedup

By removing accents and so forth

In [4]:
def normalize(x):
    return unidecode(x.strip().lower())

In [5]:
cnts.supplier.nunique()

188264

In [6]:
cnts.buyer.nunique()

2222

In [7]:
cnts['buyer'] = cnts.buyer.apply(normalize)

In [8]:
cnts['supplier'] = cnts.supplier.apply(normalize)

In [9]:
cnts.supplier.nunique()

187955

In [10]:
cnts.buyer.nunique()

2162

In [12]:
cnts['year'] = cnts.start_date.dt.year

In [19]:
cnts.groupby('code_b').single_bid.mean()

code_b
AGU    0.492917
BCN    0.757317
BCS    0.800767
CAM    0.684893
CHH    0.739047
CHP    0.647841
CMX    0.842328
COA    0.781114
COL    0.681426
DUR    0.560444
GRO    0.667820
GUA    0.720745
HID    0.750607
JAL    0.799299
MEX    0.679860
MIC    0.730522
MOR    0.750482
NAY    0.742209
NLE    0.704028
OAX    0.556886
PUE    0.740341
QUE    0.753479
ROO    0.563850
SIN    0.697845
SLP    0.792062
SON    0.859716
TAB    0.599946
TAM    0.658175
TLA    0.654615
VER    0.749477
YUC    0.705478
ZAC    0.612449
Name: single_bid, dtype: float64

In [None]:
net = cnts.groupby(['code_b', 'code_s', 'buyer', 'supplier', 'year']).agg(
    weight=('amount', 'sum'),
    cri = ('CRI', 'mean')).reset_index()

In [None]:
years = net.year.unique()

In [None]:
Gs = []
for year in sorted(years):
    Gs.append(nx.from_pandas_edgelist(
        net[net.year==year],
        source='buyer',
        target='supplier',
        edge_attr='cri'))

In [None]:
nx.con

In [None]:
data = []
for i, G in enumerate(Gs):
    year = 2011 + i
    nodes = len(G)
    edges = len(G.edges)
    density = nx.density(G)
    largest_component = len(max(nx.connected_components(G), key=len))
    number_components = nx.number_connected_components(G)
    degrees = list(dict(G.degree).values())
    mean_degree = np.mean(degrees)
    max_degree = np.max(degrees)
    min_degree = np.min(degrees)
    std_degree = np.std(degrees)
    data.append([year, nodes, edges, density, largest_component, number_components, mean_degree,
                max_degree, min_degree, std_degree])
df = pd.DataFrame(data, columns=['year', 'nodes', 'edges', 'density', 'largest_component',
                           'number_components', 'mean_degree', 'max_degree', 'min_degree',
                           'std_degree'])

In [None]:
df

In [None]:
fig, axs = plt.subplots(3,3, figsize=(12, 9), sharex=True, sharey=True)
for i, G in enumerate(Gs):
    x, y = divmod(i, 3)
    sns.distplot(list(nx.core_number(G).values()),
            kde=False, ax=axs[x][y], bins=20)
    axs[x][y].set_title(f'Year {2011 + i}')
    axs[x][y].set_yscale('log')

In [None]:
fig, axs = plt.subplots(3,3, figsize=(12, 9), sharex=True, sharey=True)
for i, G in enumerate(Gs):
    x, y = divmod(i, 3)
    sns.distplot(list(dict(G.degree()).values()),
            kde=False, ax=axs[x][y], bins=20)
    axs[x][y].set_title(f'Year {2011 + i}')
    axs[x][y].set_yscale('log')