In [125]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [126]:
pd.options.mode.chained_assignment = None  # default='warn'
np.seterr(divide='ignore', invalid='ignore')

{'divide': 'ignore', 'over': 'warn', 'under': 'ignore', 'invalid': 'ignore'}

In [127]:
db = pd.read_excel('Detections_of_IBC_2018_07_06.xlsx')

In [128]:
def define_db(route):
    db_filtered = db[(db.Route == route)]
    db_filtered.drop(['BorderLocation', 'Route'], axis=1, inplace=True)
    db_filtered.set_index('NationalityLong', inplace=True)

    cluster_now = db_filtered.iloc[:,-2:].sum(axis=1).sort_values(ascending=False)
    cluster_before = db_filtered.iloc[:,-4:-2].sum(axis=1).sort_values(ascending=False)

    top10_now = cluster_now[:10]
    top10_before = cluster_before[:10]

    return top10_now, top10_before

In [140]:
def calculate_results(top10_now, top10_before):
    nationalities = []
    nationalities_dropped = []
    top_num = 0
    
    for nat in top10_now.index:
        if nat in top10_before.index:
            nationality = [nat for nat in top10_before.index if nat == top10_now.index[top_num]]
            result = ((top10_now[nat]-top10_before[nat])/top10_before[nat])*100.0
            nationalities.append([nat, round(result,1), top10_before[nat], top10_now[nat]])
        else:
            nationalities.append([nat, str('NEW (' + str(top10_now[nat])) + ')'])
            top_num += 1
            
    for nat in top10_before.index:
        if nat not in top10_now.index:
            nationalities_dropped.append(nat)
    
    return nationalities, nationalities_dropped

In [154]:
def show_result(result, result_dropped):
    for item in result:
        if isinstance(item[1], float) and not np.isnan(item[1]):
            print(item[0] + ', '
                  + str(item[1]) + '% '
                  + '(' + str(item[2]) + ' -> '
                  + str(item[3]) + ')')
        else:
            print(item[0] + ', ' + str(item[1]))
    for item in result_dropped:
        print('{} dropped from top 10.'.format(item))

In [142]:
def main(route):
    print('\nResults for {}.\n'.format(route))
    now, before = define_db(route)
    final, final_dropped = calculate_results(now, before)
    show_result(final, final_dropped)

In [161]:
routes

array(['Black Sea', 'Central Mediterranean',
       'Circular Route from Albania to Greece', 'Eastern Land Borders',
       'Eastern Mediterranean', 'Other', 'Western African',
       'Western Balkans', 'Western Mediterranean'], dtype=object)

In [163]:
routes = np.unique(db.Route.values)
routes = np.delete(routes, 4) # issue with Eastern Med (double counting due to two border types)
routes

array(['Black Sea', 'Central Mediterranean',
       'Circular Route from Albania to Greece', 'Eastern Land Borders',
       'Other', 'Western African', 'Western Balkans',
       'Western Mediterranean'], dtype=object)

In [164]:
for route in routes:
    main(route)


Results for Black Sea.

Morocco, nan
Other and stateless, nan
Turkey, nan
Algeria, nan
Somalia, nan
Iran, nan
Belarus, nan
Afghanistan, nan
Syria, nan
Pakistan, nan

Results for Central Mediterranean.

Tunisia, 172.4% (587 -> 1599)
Sudan, 689.6% (106 -> 837)
Eritrea, 82.7% (370 -> 676)
Nigeria, 244.4% (189 -> 651)
Côte d'Ivoire, 896.7% (60 -> 598)
Mali, 422.0% (91 -> 475)
Guinea, 252.0% (98 -> 345)
Algeria, NEW (254)
Senegal, NEW (145)
Pakistan, NEW (145)
Somalia dropped from top 10.
Niger dropped from top 10.
Iraq dropped from top 10.

Results for Circular Route from Albania to Greece.

Albania, 54.1% (695 -> 1071)
Pakistan, NEW (7)
Kosovo*, 133.3% (3 -> 7)
China, NEW (6)
Iran, 400.0% (1 -> 5)
Serbia, NEW (4)
Afghanistan, -40.0% (5 -> 3)
Brazil, NEW (1)
Turkey, NEW (1)
Morocco, NEW (1)
Iraq dropped from top 10.
Libya dropped from top 10.
Algeria dropped from top 10.
Syria dropped from top 10.
Palestine dropped from top 10.
India dropped from top 10.

Results for Eastern Land Borders.