In [17]:
%%writefile offozaynapp/config.py


sheet_url = 'https://docs.google.com/spreadsheets/d/1-0B0CN3y-K0CYoPCHpsh2TKpDP3N1rIm/edit?usp=sharing&ouid=111709418728007906825&rtpof=true&sd=true'
nodes = "https://drive.google.com/file/d/1-0cpXZHUDGToKKlP_niCbjwBfQK3urSz/view?usp=sharing"
edges = "https://drive.google.com/file/d/14BW0OXkbN7BgwCTZ2YpTATAL5PBRRGRs/view?usp=sharing"
lat_lon_url = 'https://drive.google.com/file/d/13B1B_6tt_-mgt2kD719ZF_ElvPn34OKp/view?usp=sharing'
nodes="https://drive.google.com/file/d/13OnlR8d5LnHwUiyc1ftPNrczledCFcxM/view?usp=sharing"
edges = "https://drive.google.com/file/d/13RXWfn8N1z-RcWwhNLE2mglN1yQcGibK/view?usp=sharing"

Overwriting offozaynapp/config.py


In [None]:
%%writefile __init__.py




In [53]:
%%writefile offozaynapp/tools.py

import config
import streamlit as st
import graphviz as graphviz
import pandas as pd
import networkx as nx
from PIL import Image
import platform
import plotly.figure_factory as ff
import numpy as np

# Taken with modification from
# https://newbedev.com/pandas-how-to-read-csv-file-from-google-drive-public"""
get_url = lambda u: 'https://drive.google.com/uc?export=download&id=' + u.split('/')[-2]
    

def check_platform():
    pl = platform.platform()
    if pl.startswith('macOS'):
        return 'local'
    return 'remote'

def is_local():
    return check_platform()=='local'
    
    
@st.cache(suppress_st_warning=True, show_spinner=False)
def load_data(url, nrows):
    data = pd.read_csv(get_url(url), nrows=nrows) 
    return data

@st.cache(suppress_st_warning=True, show_spinner=False)
def load_nodes():
#     if is_local():
#         return pd.read_csv('./data/nodes_single.csv', low_memory=False) 
#     else:
    return pd.read_csv(get_url(config.nodes), low_memory=False)
    
@st.cache(suppress_st_warning=True, show_spinner=False)
def load_edges():
#     if is_local():
#         return pd.read_csv('./data/edges_all_single.csv', low_memory=False) 
#     else:
    return pd.read_csv(get_url(config.edges), low_memory=False)

@st.cache(suppress_st_warning=True, show_spinner=False)
def load_lat_lon(show_spinner=False):
    if is_local():
        return pd.read_csv("~/Google Drive/My Drive/GA/capstone/data/clean/nodes_edges/nodes_lat_lon.csv") 
#         return pd.read_csv('./data/nodes_lat_lon.csv') 
    else:
        return pd.read_csv(get_url(config.lat_lon_url))


@st.cache(suppress_st_warning=True, show_spinner=False)
def read_xlsx(sheet, nrows=10000):
    if is_local():
        return pd.read_excel('./data/specific_edges_single.xlsx', sheet_name=sheet, nrows=nrows)
    else:
        return pd.read_excel(get_url(config.sheet_url), sheet_name=sheet, nrows=nrows)


def create_digraph_new(df, count=10):   
    """
    https://discuss.streamlit.io/t/support-for-networkx-pyvis-and-folium/190/2
    """
    final_count = min(df.shape[0], count)
    df.sort_values(by='weight', ascending=False, inplace=True)
    G = nx.DiGraph()
    G.add_weighted_edges_from([tuple(x) for x in df.head(final_count).values])
    dot = nx.nx_pydot.to_pydot(G)
    st.graphviz_chart(dot.to_string(), use_container_width=True)
    
def show_image(name, caption):
    image = Image.open(f'images/{name}')
    st.image(image, caption=caption)

    

Overwriting offozaynapp/tools.py


In [45]:
%%writefile offozaynapp/main.py

import tools
import plotly_tools
import streamlit as st
import pandas as pd
import numpy as np
import platform
import maptools

st.set_page_config(page_title="Offshore Leaks Exploration")


st.title('Offshore Leaks')

#st.write(platform.platform())

select_list = ['', 'jurisdiction', 'countries', 'country_codes', 'continents', 'company_type', 'jurisdiction_description', 'table']
select_dict = {k.replace('_', ' ').title(): k for k in select_list}


select_key = st.sidebar.selectbox(
    "Which field would you like to explore?",
    tuple(select_dict.keys())
)


select_link = st.sidebar.selectbox(
    "Which link type would you like to explore?",
    ('','registered address','related entity','shareholder of','intermediary of','trust settlor of','protector of',
 'joint settlor of','beneficiary of','tax advisor of','beneficial owner of','resident director of','secretary of','director of','trustee of trust of',
 'successor protector of','alternate director of','investment advisor of','authorised person / signatory of',
 'assistant secretary of','officer of','auditor of','legal advisor of','general accountant of',
 'Nominee Shareholder of','co-trustee of trust of','register of shareholder of','reserve director of','register of director of','bank signatory of',
 'personal directorship of','stockbroker of','correspondent addr. of','appointor of', 'president of',
 'treasurer of','safekeeping of','Nominee Director of','vice president of','Nominee Protector of','nominated person of',
 'auth. representative of','custodian of','chairman of','records & registers of',
 'Nominee Investment Advisor of','Nominee Trust Settlor of','Nominee Beneficiary of','Nominee Beneficial Owner of','nominee name of')
)


select_value = select_dict[select_key]

if select_value:
    #st.write(select_value)
    
    if tools.is_local():
        if select_value == 'countries':
            tools.show_image('top_25_countries__address__intermediary__officer__entity.png', 'Countries')



    weight_data = tools.read_xlsx(select_value)
    st.write(f'Total {weight_data.shape[0]} edges')

    st.subheader('Edges - Weighted')
    st.write(weight_data.head())

#     weight_data = weight_data[weight_data['weight']>1]
    
    if st.checkbox('Show raw data'):
        st.subheader(f'{select_key} Source')
        hist_values = weight_data[f'{select_value}_source'].value_counts().sort_values(ascending=False)
        n_columns = st.slider('How many columns?', 0, hist_values.shape[0], min(10, hist_values.shape[0]))
        st.bar_chart(hist_values.head(n_columns))
        st.write(hist_values)

    
    n_edges = st.slider('How many edges?', 0, min(weight_data.shape[0], 100), min(10, weight_data.shape[0]))
    tools.create_digraph_new(weight_data, n_edges)
    
if st.checkbox('Show Latitude & Longitude data'):
    lat_lon_data = tools.load_lat_lon()
    st.subheader('Raw data')
    st.write(lat_lon_data.head())
    st.map(lat_lon_data, zoom=1)
    
    
# if st.checkbox('Show plotly'):
#     plotly_tools.test_plotly()


if st.checkbox('Show Map plotly'):
    if select_link=='':
        st.write('Please select a link!')
    if select_link:
        maptools.map_plotly(select_link)

Overwriting offozaynapp/main.py


In [46]:
%%writefile offozaynapp/plotly_tools.py

import tools
import networkx as nx
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go

@st.cache(suppress_st_warning=True, show_spinner=False)
def get_graph():
    data_load_state = st.text('Loading data ... ')
    nodes = tools.load_nodes()
    edges = tools.load_edges()
    edges = edges.sample(500)
    data_load_state.text('Loading data ... done!')
    
    col_edges_conv = {'START_ID': 'source', 
                  'END_ID': 'target',
                  'link': 'Type',
                  'active_days': 'Active Days'
                  }
    col_nodes_conv = {'countries': 'Country', 
                    'continents': 'Region',
                    'jurisdiction': 'Jurisdiction', 
                    'service_provider': 'Service Provider',
                    'company_type': 'Company Type'}

    edges_cols = ['START_ID', 'END_ID', 'active_days', 'link']
    nodes_cols = ['node_id', 'countries', 'continents','jurisdiction', 'service_provider', 'company_type', 'location']
    linkData = edges[edges_cols].rename(columns = col_edges_conv)
    nodeData = nodes[nodes_cols].rename(columns = col_nodes_conv )
    data_load_state = st.text('Making graph ...')
    G = nx.from_pandas_edgelist(linkData, 'source', 'target', True, nx.DiGraph())
    nx.set_node_attributes(G, nodeData.set_index('node_id').to_dict('index'))
    data_load_state.text('Making Graph ... done!')
    return G


@st.cache(suppress_st_warning=True, show_spinner=False)
def test_plotly():

    G = get_graph()

    field = 'location'
    
    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = eval(G.nodes[edge[0]][field])
        x1, y1 = eval(G.nodes[edge[1]][field])
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)
        


    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        hoverinfo='none',
        mode='lines')

    node_x = []
    node_y = []
    for node in G.nodes():
        x, y = eval(G.nodes[node][field])
        node_x.append(x)
        node_y.append(y)

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=True,
            # colorscale options
            #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
            #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
            #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
            colorscale='YlGnBu',
            reversescale=True,
            color=[],
            size=10,
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            ),
            line_width=2))
    
    node_adjacencies = []
    node_text = []
    for node, adjacencies in enumerate(G.adjacency()):
        node_adjacencies.append(len(adjacencies[1]))
        node_text.append('# of connections: '+str(len(adjacencies[1])))

    node_trace.marker.color = node_adjacencies
    node_trace.text = node_text


    fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='Title',
                titlefont_size=16,
                showlegend=False,
                hovermode='closest',
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )

    st.plotly_chart(fig)
    

Overwriting offozaynapp/plotly_tools.py


In [54]:
%%writefile offozaynapp/maptools.py
# https://plotly.com/python/lines-on-maps/

import streamlit as st

import plotly.graph_objects as go
import pandas as pd
import tools

@st.cache(suppress_st_warning=True, show_spinner=True)
def map_plotly(select_value=None):

    data_load_state = st.text('Loading data ... ')
    nodes = tools.load_nodes()
    edges = tools.load_edges()
    if select_value:
        edges = edges[edges['link']==select_value]

#     edges = edges.sample(min(edges.shape[0], 5000))
    st.write(edges.head(2))
    all_edge_nodes = edges['START_ID'].tolist() + edges['END_ID'].tolist()
    nodes = nodes[nodes['node_id'].isin(all_edge_nodes)]
    st.write(f'Number of edges: {edges.shape[0]:,}')
    st.write(f'Number of nodes: {nodes.shape[0]:,}')
    data_load_state.text('Loading data ... done!')
    



    fig = go.Figure()

    fig.add_trace(go.Scattergeo(
        locationmode = 'country names',
        lon = nodes['lon'].tolist(),
        lat = nodes['lat'].tolist(),
        hoverinfo = 'text',
        text = nodes['country'].tolist(),
        mode = 'markers',
        marker = dict(
            size = 2,
            color = 'rgb(255, 0, 0)',
            line = dict(
                width = 3,
                color = 'rgba(68, 68, 68, 0)'
            )
        )))

    flight_paths = []
    for i in range(edges.shape[0]):
        fig.add_trace(
            go.Scattergeo(
                locationmode = 'country names',
                lon = [edges.iloc[i]['lon_s'], edges.iloc[i]['lon_t']],
                lat = [edges.iloc[i]['lat_s'], edges.iloc[i]['lat_t']],
                mode = 'lines',
                line = dict(width = 1,color = 'blue'),
#                 opacity = float(df_flight_paths['cnt'][i]) / float(df_flight_paths['cnt'].max()),
            )
        )

    fig.update_layout(
        title_text = '',
        showlegend = False,
        geo = dict(
            scope = 'world',
            projection_type = 'equirectangular',
            showland = True,
            landcolor = 'rgb(243, 243, 243)',
            countrycolor = 'rgb(204, 204, 204)',
        ),
    )


    st.plotly_chart(fig, use_container_width=True)


Overwriting offozaynapp/maptools.py


In [48]:
%%writefile offozaynapp/requirements.txt

graphviz
pydot
openpyxl
networkx
plotly
scipy

Overwriting offozaynapp/requirements.txt


In [58]:
#!conda install -c anaconda graphviz -y
#!conda install -c anaconda pydot -y
#!conda install -c anaconda openpyxl -y