# Case Study: Panama Papers

[Dataset download](https://github.com/ICIJ/offshoreleaks-data-packages/tree/main/raw-data)

In [4]:
import csv
import pickle
import itertools
from collections import Counter
import networkx as nx
from networkx.drawing.nx_agraph import graphviz_layout 
import matplotlib.pyplot as plt

In [5]:
import dzcnapy_plotlib as dzcnapy

In [6]:
EDGES = "beneficiary"
NODES = (("./csv_panama_papers.2018-02-14/panama_papers.nodes.entity.csv", "jurisdiction", "name"),
         ("./csv_panama_papers.2018-02-14/panama_papers.nodes.officer.csv", "country_codes", "name"),
         ("./csv_panama_papers.2018-02-14/panama_papers.nodes.intermediary.csv", "country_codes", "name"))

In [7]:
panama = nx.Graph()

with open("./csv_panama_papers.2018-02-14/panama_papers.edges.csv") as infile:
    data = csv.DictReader(infile)
    panama.add_edges_from((link["START_ID"], link["END_ID"])
                          for link in data
                          if link["TYPE"].lower().startswith(EDGES))

In [8]:
nodes = set(panama.nodes())
relabel = {}

In [9]:
for f, cc, name in NODES:
    with open(f) as infile:
        kind = f.split(".")[0]
        data = csv.DictReader(infile)
        names_countries = {node["node_id"] :
                           (node[name].strip().upper(), node[cc])
                           for node in data
                           if node["node_id"] in nodes}
    names =     {nid: values[0] for nid, values in names_countries.items()}
    countries = {nid: values[1] for nid, values in names_countries.items()}
    kinds =     {nid: kind      for nid, _      in names_countries.items()}
    nx.set_node_attributes(panama, countries, "country")
    nx.set_node_attributes(panama, kinds, "kind")
    relabel.update(names)

nx.relabel_nodes(panama, relabel, copy=False)

<networkx.classes.graph.Graph at 0x7fc8bdbb4e80>

In [10]:
if "ISSUES OF:" in panama:
    panama.remove_node("ISSUES OF:")

if "" in panama:
    panama.remove_node("")

print(nx.number_of_nodes(panama), nx.number_of_edges(panama))

0 0


In [11]:
components = [p.nodes() for p in nx.connected_components(panama)
              if nx.number_of_nodes(p) >= 20
              or nx.number_of_edges(p) >= 20]
panama0 = panama.subgraph(itertools.chain.from_iterable(components))

print(nx.number_of_nodes(panama0), nx.number_of_edges(panama0))

0 0


In [12]:
with open("panama-beneficiary.pickle", "wb") as outfile:
    pickle.dump(panama, outfile)

In [14]:
! pip install pygraphviz

Collecting pygraphviz
  Using cached pygraphviz-1.9.zip (119 kB)
Building wheels for collected packages: pygraphviz
  Building wheel for pygraphviz (setup.py) ... [?25lerror
[31m  ERROR: Command errored out with exit status 1:
   command: /Users/piyush/opt/anaconda3/bin/python -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/private/var/folders/cr/cspt5tz93w7cqfslq2qjbg_c0000gn/T/pip-install-9sdmbkv3/pygraphviz_9f9b2a2ed09b486a82fd460d62f6086e/setup.py'"'"'; __file__='"'"'/private/var/folders/cr/cspt5tz93w7cqfslq2qjbg_c0000gn/T/pip-install-9sdmbkv3/pygraphviz_9f9b2a2ed09b486a82fd460d62f6086e/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' bdist_wheel -d /private/var/folders/cr/cspt5tz93w7cqfslq2qjbg_c0000gn/T/pip-wheel-sig1tzya
       cwd: /pr

In [13]:
panama0 = panama
cdict = {"Entities": "pink", "Officers": "blue", 
         "Intermediaries" : "green"}
c = [cdict[panama0.nodes[n]["kind"]] for n in panama0]
dzcnapy.small_attrs["node_color"] = c
pos = graphviz_layout(panama0)
nx.draw_networkx(panama0, pos=pos, with_labels=False, **dzcnapy.small_attrs)
dzcnapy.set_extent(pos, plt)
dzcnapy.plot("panama0")

ImportError: requires pygraphviz http://pygraphviz.github.io/

In [None]:
import pandas as pd

In [None]:
edg = pd.read_csv('./csv_panama_papers.2018-02-14/panama_papers.edges.csv', low_memory=False)

In [None]:
edg.head()

Unnamed: 0,START_ID,TYPE,END_ID,link,start_date,end_date,sourceID,valid_until
0,10000035,registered_address,14095990,registered address,,,Panama Papers,
1,10000044,registered_address,14091035,registered address,,,Panama Papers,
2,10000055,registered_address,14095990,registered address,,,Panama Papers,
3,10000064,registered_address,14091429,registered address,,,Panama Papers,
4,10000089,registered_address,14098253,registered address,,,Panama Papers,


In [None]:
edg.TYPE.unique()

array(['registered_address', 'intermediary_of', 'officer_of'],
      dtype=object)