In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from bs4 import BeautifulSoup
import folium
import json
import dateutil.parser
import datetime as dt
from datetime import datetime
from scipy.interpolate import interp1d
from functools import reduce
import pycountry

from utils import preprocessing

In [2]:
entities = r'./panama_csv/Entities.csv'
entities = pd.read_csv(entities,index_col='name', header=0, low_memory=False)
entities=entities.rename(columns = {'countries':'Country','jurisdiction':'abbr_jurisd','jurisdiction_description':'jurisdiction'})

In [6]:
def parse_year_of_date(row, column, from_year, to_year):
    if isinstance(row[column], str):
        date = dateutil.parser.parse(row[column])
        if date.year <= to_year and date.year >= from_year:
            return date.year #.strftime('%Y-%m')
        else:
            return np.nan
    else:
        return np.nan    
    
def parse_dates(dataframe, from_year, to_year): #leak_data, bounded_data_bottom, bounded_data_up
    date_events = [["incorporation_date","incorporation_before_leak"],
                   ["inactivation_date","inactivation_before_leak"], 
                   ["struck_off_date","struck_off_before_leak"],
                   ["dorm_date", "dorm_date_before_leak"]]
    for date_event in date_events:
        dataframe[date_event[0]] = dataframe.apply(lambda row: parse_year_of_date(row,date_event[0], from_year, to_year), axis=1) 
    return dataframe

In [34]:
def process_countries_with_code(first_involved_countries, analisys_on='jurisdiction', from_year=1990, to_year=2017):
    most_involved_leak = []
    for index, involved_country in enumerate(first_involved_countries):
        testing_entities = entities.copy()
        involved_leak = testing_entities[testing_entities['Country'].isin([involved_country])].copy()
        involved_leak = parse_dates(involved_leak, from_year, to_year)
        total_incorporation = involved_leak.groupby(['Country','jurisdiction', 'incorporation_date']).count()
        total_inactivation = involved_leak.groupby(['Country','jurisdiction', 'inactivation_date']).count()
        total_struck = involved_leak.groupby(['Country','jurisdiction', 'struck_off_date']).count()
        incorporation = total_incorporation.reset_index().rename(columns={'incorporation_date': 'date', 'node_id': 'incorporations'}).set_index(['Country','jurisdiction','date'])
        inactivation = total_inactivation.reset_index().rename(columns={'inactivation_date': 'date', 'node_id': 'inactivations'}).set_index(['Country','jurisdiction','date'])
        struck = total_struck.reset_index().rename(columns={'struck_off_date': 'date', 'node_id': 'strucks'}).set_index(['Country','jurisdiction','date'])
        incorporation = incorporation.loc[:, ['incorporations']]
        inactivation = inactivation.loc[:, ['inactivations']]
        struck = struck.loc[:, ['strucks']]
        country_res = pd.merge(incorporation.reset_index(),
                                           inactivation.reset_index(), 
                                           on=['Country','jurisdiction', 'date'],
                                           how='outer').set_index(['Country','jurisdiction','date'])
        country_res = pd.merge(country_res.reset_index(),
                                           struck.reset_index(), 
                                           on=['Country','jurisdiction', 'date'],
                                           how='outer').set_index(['Country','jurisdiction','date'])
        involved = involved_leak.copy()
        for index, row in country_res.iterrows():
            number_of_offshores = involved[
                ((involved['inactivation_date'] > int(index[2])) | 
                (pd.isnull(involved['inactivation_date']))) &
                (involved['incorporation_date'] <= int(index[2])) & 
                (involved['Country'] == index[0]) &
                (involved['jurisdiction'] == index[1])].count()['node_id']             
            country_res.loc[index, 'active offshores'] = number_of_offshores 
        country_result = country_res.loc[:, ['incorporations','inactivations','active offshores','strucks']]
        country_result = country_result.reset_index()
        country_result["date"] = country_result["date"].astype(int)
        country_result = country_result.set_index(['Country','jurisdiction','date'])
        most_involved_leak.append(country_result)
        print("Country done:" + involved_country)
    collection = []
    for f_ in most_involved_leak:
        f_ = pd.DataFrame(f_['active offshores'])
        collection.append(f_.reset_index())
    countries_frame = reduce(lambda x, y: pd.merge(x, y, on = ['Country', 'jurisdiction', 'date','active offshores'], how='outer'), collection)
    countries_frame = countries_frame.set_index(['Country','jurisdiction','date'])
    countries_frame = countries_frame.unstack(level=[2]).reset_index()
    countries_frame.columns = countries_frame.columns.droplevel(0)
    countries_frame.columns.values[0] = 'Country' 
    countries_frame.columns.values[1] = 'jurisdiction'
    countries_frame = countries_frame[(countries_frame['jurisdiction'] != 'Undetermined')]
    for id_, row in countries_frame.iterrows():
        try:
            cou = row['Country']
            if(cou=='Russia'):
                cou = int('643')
            elif (cou == 'Isle Of Man'):
                cou = int('833')
            elif (cou == 'British Virgin Islands'):
                cou = int('92')
            else:
                cou = int(pycountry.countries.get(name=cou).numeric)

            jur = row['jurisdiction']
            if(jur == 'British Anguilla'):
                jur = int('660')
            elif (jur == 'British Virgin Islands'):
                jur = int('92')
            elif (jur == 'Nevada'):
                jur = int('840')
            elif (jur == 'Wyoming'):
                jur = int('840')
            elif (jur == 'Ras Al Khaimah'):
                jur = int('784')
            elif (jur == 'Isle Of Man'):
                jur = int('833')
            elif (jur == 'United States Of America'):
                jur = int('840')
            elif (jur == 'Dubai'):
                jur = int('784')
            else:
                jur = int(pycountry.countries.get(name=jur).numeric)
        except:
            print(row['Country'])
            print(row['jurisdiction'])
        countries_frame.loc[id_, 'Country_name'] = row['Country']
        countries_frame.loc[id_, 'Country'] = cou
        countries_frame.loc[id_, 'jurisdiction'] = jur
        
    return countries_frame

In [3]:
all_countries = entitizes.groupby('Country').count().index

In [4]:
countries_frame = process_countries(all_countries, analisys_on='Country', from_year=1900, to_year=2017)

Country done:Albania
Country done:American Samoa
Country done:Andorra
Country done:Andorra;Not identified
Country done:Angola
Country done:Anguilla
Country done:Antigua and Barbuda
Country done:Antigua and Barbuda;British Virgin Islands
Country done:Antigua and Barbuda;Not identified
Country done:Argentina
Country done:Aruba
Country done:Australia
Country done:Australia;Belize
Country done:Australia;British Virgin Islands
Country done:Australia;Not identified
Country done:Austria
Country done:Austria;British Virgin Islands
Country done:Austria;Not identified
Country done:Azerbaijan
Country done:Bahamas
Country done:Bahamas;Not identified
Country done:Bahamas;Panama
Country done:Bahrain
Country done:Bangladesh
Country done:Barbados
Country done:Belarus
Country done:Belarus;British Virgin Islands
Country done:Belarus;Not identified
Country done:Belgium
Country done:Belgium;British Virgin Islands
Country done:Belgium;Not identified
Country done:Belize
Country done:Belize;British Virgin Is

  stride //= shape[i]


Country done:Cayman Islands;Taiwan
Country done:Cayman Islands;Taiwan;British Virgin Islands
Country done:Central African Republic
Country done:Chad
Country done:Chile
Country done:China
Country done:China;British Virgin Islands
Country done:China;Not identified
Country done:Colombia
Country done:Cook Islands
Country done:Cook Islands;Cayman Islands
Country done:Cook Islands;Hong Kong
Country done:Cook Islands;Indonesia
Country done:Cook Islands;Not identified
Country done:Cook Islands;Singapore
Country done:Cook Islands;Taiwan
Country done:Cook Islands;United States
Country done:Costa Rica
Country done:Costa Rica;Not identified
Country done:Croatia
Country done:Croatia;British Virgin Islands
Country done:Croatia;Not identified
Country done:Cuba
Country done:Curaçao
Country done:Curaçao;British Virgin Islands
Country done:Curaçao;Netherlands;Not identified
Country done:Curaçao;Not identified
Country done:Cyprus
Country done:Cyprus;British Virgin Islands
Country done:Cyprus;Not identifi

Country done:Romania;Not identified
Country done:Russia
Country done:Russia;British Virgin Islands
Country done:Russia;Not identified
Country done:Russia;Saint Kitts and Nevis
Country done:Saint Kitts and Nevis
Country done:Saint Kitts and Nevis;British Virgin Islands
Country done:Saint Kitts and Nevis;Canada
Country done:Saint Kitts and Nevis;Latvia
Country done:Saint Kitts and Nevis;Not identified
Country done:Saint Kitts and Nevis;Russia
Country done:Saint Kitts and Nevis;Sweden
Country done:Saint Kitts and Nevis;United Arab Emirates
Country done:Saint Kitts and Nevis;United Kingdom
Country done:Saint Kitts and Nevis;United States
Country done:Saint Lucia
Country done:Saint Vincent and the Grenadines
Country done:Samoa
Country done:Samoa;Cayman Islands
Country done:Samoa;Cook Islands
Country done:Samoa;Hong Kong
Country done:Samoa;Malaysia
Country done:Samoa;Not identified
Country done:Samoa;Singapore
Country done:Samoa;Taiwan
Country done:Samoa;Thailand
Country done:Saudi Arabia
Co

In [7]:
countries_frame

date,Country,jurisdiction,2014,2010,1990,1991,1992,1994,1995,1996,...,1955,1963,1964,1965,1936,1958,1959,1930,1961,Country_name
0,8,44,2.0,,,,,,,,...,,,,,,,,,,Albania
1,16,826,,1.0,,,,,,,...,,,,,,,,,,American Samoa
2,20,44,,,2.0,6.0,9.0,14.0,15.0,16.0,...,,,,,,,,,,Andorra
3,20,92,,8.0,3.0,5.0,9.0,10.0,9.0,10.0,...,,,,,,,,,,Andorra
4,20,188,3.0,,,,,,,,...,,,,,,,,,,Andorra
5,20,344,8.0,,,,,,,,...,,,,,,,,,,Andorra
6,20,840,7.0,15.0,,,,,,,...,,,,,,,,,,Andorra
7,20,570,,,,,,,1.0,2.0,...,,,,,,,,,,Andorra
8,20,591,105.0,164.0,27.0,27.0,29.0,28.0,30.0,31.0,...,,,,,,,,,,Andorra
9,20,882,,8.0,,,,,,,...,,,,,,,,,,Andorra


In [5]:
# countries_frame.to_csv('../../data/csv/cash_flows_actives.csv', index=False)

In [6]:
# countries_frame_wname = countries_frame.groupby('Country').sum().reset_index()
# countries_frame_name = countries_frame.groupby(['Country','Country_name']).sum().reset_index()