In [180]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')
!pip install --quiet pycountry_convert
from pycountry_convert import country_alpha2_to_country_name, country_name_to_country_alpha3
import os

In [181]:
data = pd.read_csv('dd.csv')

In [182]:
data[:5]

Unnamed: 0,yearOfRegistration,Datasource,gender,ageBroad,majorityStatus,majorityStatusAtExploit,majorityEntry,citizenship,meansOfControlDebtBondage,meansOfControlTakesEarnings,...,typeOfSexPrivateSexualServices,typeOfSexConcatenated,isAbduction,RecruiterRelationship,CountryOfExploitation,recruiterRelationIntimatePartner,recruiterRelationFriend,recruiterRelationFamily,recruiterRelationOther,recruiterRelationUnknown
0,2002,Case Management,Female,18--20,Adult,-99,-99,CO,-99,-99,...,-99,-99,-99,-99,-99,0,0,0,0,1
1,2002,Case Management,Female,18--20,Adult,-99,-99,CO,-99,-99,...,-99,-99,-99,-99,-99,0,0,0,0,1
2,2002,Case Management,Female,18--20,Adult,-99,-99,CO,-99,-99,...,-99,-99,-99,-99,-99,0,0,0,0,1
3,2002,Case Management,Female,18--20,Adult,-99,-99,CO,-99,-99,...,-99,-99,-99,-99,-99,0,0,0,0,1
4,2002,Case Management,Female,18--20,Adult,-99,-99,CO,-99,-99,...,-99,-99,-99,-99,-99,0,0,0,0,1


In [108]:
data.replace('-99', np.nan, inplace=True)
data.replace(-99, np.nan, inplace=True)

In [183]:
def get_alpha3(col):
    try:
        iso_3 =  country_name_to_country_alpha3(col)
    except:
        iso_3 = 'Unknown'
    return iso_3

def get_name(col):
    try:
        name =  country_alpha2_to_country_name(col)
    except:
        name = 'Unknown'
    return name

In [184]:
data['country'] = data['citizenship'].apply(lambda x: get_name(x))
data['alpha_3'] = data['country'].apply(lambda x: get_alpha3(x))
data_map = pd.DataFrame(data.groupby(['country', 'alpha_3'])['alpha_3'].agg(Victims='count')).reset_index()

In [185]:
fig = px.choropleth(data_map, locations="alpha_3",
                    color="Victims",
                    hover_name="country",
                    color_continuous_scale='Viridis_r')
fig.update_layout(title_text="Human Trafficking Victims")
fig.show()

In [186]:
cm = sns.light_palette("blue", as_cmap=True)
table = pd.pivot_table(data, values='Datasource', index='country',
                    columns='yearOfRegistration', aggfunc='count', fill_value=0)
table.style.background_gradient(cmap=cm)

yearOfRegistration,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,0,12,71,14,0,0,0,0
Albania,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Bangladesh,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0
Belarus,0,0,54,263,233,11,11,0,14,88,161,301,58,106,114,30,19,0
"Bolivia, Plurinational State of",0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0,0
Bulgaria,0,0,0,116,0,226,0,0,0,0,0,0,0,0,0,0,0,0
Burkina Faso,0,0,0,0,0,0,0,0,33,0,0,0,0,0,0,0,0,0
Cambodia,0,0,0,0,0,0,0,0,52,0,0,0,0,116,267,647,897,0
China,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,12,0
Colombia,74,37,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0


In [189]:
data['Victims'] = 1
fig = px.sunburst(data[data.ageBroad.notna()], path=['gender', 'ageBroad'], values='Victims', color='gender',
                  title='Gender and Age of Human Trafficking Victims')
fig.update_layout(width=700, height=700)
fig.show()

In [192]:
data_bar_mg = pd.DataFrame(data.groupby(['gender', 'majorityStatus'])['majorityStatus'].agg(Victims='count')).reset_index()

In [193]:
fig = px.bar(data_bar_mg, x="majorityStatus", y="Victims", color="gender", 
            title="Majority and Gender of Human Trafficking Victims",
            labels={'majorityStatus':'Majority Status'})
fig.update_traces(texttemplate='%{value}', textposition='outside')
fig.update_layout(hovermode='x')
fig.show()

In [194]:
data['meansOfControlConcatenated'] = data['meansOfControlConcatenated'].str.replace('Abuse', 'abuse', regex=True)
data_bar_f = data[(data.meansOfControlConcatenated.notna()) & (data.gender == 'Female')].meansOfControlConcatenated.apply(lambda x: pd.value_counts(str(x).split(";"))).sum(axis = 0)
data_bar_m = data[(data.meansOfControlConcatenated.notna()) & (data.gender == 'Male')].meansOfControlConcatenated.apply(lambda x: pd.value_counts(str(x).split(";"))).sum(axis = 0)

In [195]:
fig = go.Figure(data=[
    go.Bar(name='Female', x=data_bar_f.index, y=data_bar_f),
    go.Bar(name='Male', x=data_bar_m.index, y=data_bar_m)
])
fig.update_traces(texttemplate='%{value}', textposition='outside')
fig.update_layout(hovermode='x', title_text='Means of Control')
fig.show()

In [197]:
table2 = pd.DataFrame()
for i in data[data.ageBroad.notna()].ageBroad.unique():
    age_col = pd.DataFrame(data[(data.meansOfControlConcatenated.notna()) & (data.ageBroad == i)].meansOfControlConcatenated.apply(lambda x: pd.value_counts(str(x).split(";"))).sum(axis = 0))
    age_col.rename(columns={0: i}, inplace=True)
    table2 = pd.concat([table2,age_col],axis=1)

age_list = ['0--8', '9--17', '18--20', '21--23', '24--26', '27--29', '30--38', '39--47', '48+']
table2 = table2.reindex(columns=age_list)

In [198]:
table2.fillna(0).style.background_gradient(cmap=cm).format('{:,.0f}')

Unnamed: 0,0--8,9--17,18--20,21--23,24--26,27--29,30--38,39--47,48+
-99,1314,2991,2185,2501,2043,1673,4620,2001,1157
Physical abuse,38,644,495,486,466,325,771,252,163
Psychological abuse,88,1020,535,515,524,373,1153,393,256
Sexual abuse,45,471,220,212,219,116,229,68,37
Debt bondage,0,88,112,144,231,138,357,119,73
Excessive working hours,11,106,43,149,234,189,847,385,215
False promises,32,199,133,213,336,243,937,370,218
Psychoactive substances,5,709,362,328,264,166,328,82,43
Restricts medical care,2,26,39,88,174,131,474,200,151
Restricts movement,19,688,575,532,554,383,1092,366,190


In [18]:
data_bar_f = data[(data.typeOfExploitConcatenated.notna()) & (data.gender == 'Female')].typeOfExploitConcatenated.apply(lambda x: pd.value_counts(str(x).split(";"))).sum(axis = 0)
data_bar_m = data[(data.typeOfExploitConcatenated.notna()) & (data.gender == 'Male')].typeOfExploitConcatenated.apply(lambda x: pd.value_counts(str(x).split(";"))).sum(axis = 0)

In [199]:
fig = go.Figure(data=[
    go.Bar(name='Female', x=data_bar_f.index, y=data_bar_f),
    go.Bar(name='Male', x=data_bar_m.index, y=data_bar_m)
])
fig.update_traces(texttemplate='%{value}', textposition='outside')
fig.update_layout(title_text='Type of Exploit')
fig.show()

In [200]:
table3 = pd.DataFrame()
for i in data[data.ageBroad.notna()].ageBroad.unique():
    age_col = pd.DataFrame(data[(data.typeOfExploitConcatenated.notna()) & (data.ageBroad == i)].typeOfExploitConcatenated.apply(lambda x: pd.value_counts(str(x).split(";"))).sum(axis = 0))
    age_col.rename(columns={0: i}, inplace=True)
    table3 = pd.concat([table3,age_col],axis=1)
    
table3 = table3.reindex(columns=age_list)

In [201]:
table3.fillna(0).style.background_gradient(cmap=cm).format('{:,.0f}')

Unnamed: 0,0--8,9--17,18--20,21--23,24--26,27--29,30--38,39--47,48+
Sexual exploitation,158,5077,2936,2350,1568,891,1605,388,167
-99,1164,2458,1318,1481,1414,1157,3158,1363,1017
Forced labour,205,1079,193,339,467,582,2331,1040,376
Forced marriage,0,11,15,27,15,11,0,0,0
Combined sexual and labour exploitation,0,32,14,0,0,0,32,0,0
Other,30,52,0,16,0,0,29,30,0
Slavery and similar practices,0,0,0,0,0,0,0,0,0


In [202]:
data_sex_type = data.typeOfSexConcatenated.value_counts()

In [203]:
fig = px.pie(data_sex_type, values=data_sex_type, names=data_sex_type.index,
            title="Distribution of Sex Exploit")
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [204]:
data_bar_f = data[(data.RecruiterRelationship.notna()) & (data.gender == 'Female')].RecruiterRelationship.apply(lambda x: pd.value_counts(str(x).split("; "))).sum(axis = 0)
data_bar_m = data[(data.RecruiterRelationship.notna()) & (data.gender == 'Male')].RecruiterRelationship.apply(lambda x: pd.value_counts(str(x).split("; "))).sum(axis = 0)

In [205]:
fig = go.Figure(data=[
    go.Bar(name='Female', x=data_bar_f.index, y=data_bar_f),
    go.Bar(name='Male', x=data_bar_m.index, y=data_bar_m)
])
fig.update_traces(texttemplate='%{value}', textposition='outside')
fig.update_layout(title_text='Recruiter Relationship')
fig.show()

In [206]:
data['Victims'] = 1
data

Unnamed: 0,yearOfRegistration,Datasource,gender,ageBroad,majorityStatus,majorityStatusAtExploit,majorityEntry,citizenship,meansOfControlDebtBondage,meansOfControlTakesEarnings,...,RecruiterRelationship,CountryOfExploitation,recruiterRelationIntimatePartner,recruiterRelationFriend,recruiterRelationFamily,recruiterRelationOther,recruiterRelationUnknown,country,alpha_3,Victims
0,2002,Case Management,Female,18--20,Adult,-99,-99,CO,-99,-99,...,-99,-99,0,0,0,0,1,Colombia,COL,1
1,2002,Case Management,Female,18--20,Adult,-99,-99,CO,-99,-99,...,-99,-99,0,0,0,0,1,Colombia,COL,1
2,2002,Case Management,Female,18--20,Adult,-99,-99,CO,-99,-99,...,-99,-99,0,0,0,0,1,Colombia,COL,1
3,2002,Case Management,Female,18--20,Adult,-99,-99,CO,-99,-99,...,-99,-99,0,0,0,0,1,Colombia,COL,1
4,2002,Case Management,Female,18--20,Adult,-99,-99,CO,-99,-99,...,-99,-99,0,0,0,0,1,Colombia,COL,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48796,2019,Case Management,Female,-99,-99,Adult,-99,UG,0,0,...,-99,UG,-99,-99,-99,-99,-99,Uganda,UGA,1
48797,2019,Case Management,Female,-99,-99,Adult,-99,UG,0,0,...,-99,UG,-99,-99,-99,-99,-99,Uganda,UGA,1
48798,2019,Case Management,Female,-99,-99,Adult,-99,UG,0,0,...,-99,UG,-99,-99,-99,-99,-99,Uganda,UGA,1
48799,2019,Case Management,Female,-99,-99,Minor,-99,UG,0,0,...,-99,UG,-99,-99,-99,-99,-99,Uganda,UGA,1


In [207]:
y=data_bar_f

In [208]:
x=data_bar_m 

In [209]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD, Adam, Adadelta, RMSprop
import keras.backend as K
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Classification Report
from sklearn.metrics import classification_report
import tensorflow as tf

In [210]:
from keras.utils.np_utils import to_categorical

In [211]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [212]:
model = Sequential()
model.add(Dense(60, input_shape = (33,), activation = "relu"))
model.add(Dense(15, activation = "relu"))
model.add(Dense(4, activation = "softmax"))
model.compile(Adam(lr = 0.01), "categorical_crossentropy", metrics = ["accuracy"])
model.summary()


Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_41 (Dense)            (None, 60)                2040      
                                                                 
 dense_42 (Dense)            (None, 15)                915       
                                                                 
 dense_43 (Dense)            (None, 4)                 64        
                                                                 
Total params: 3,019
Trainable params: 3,019
Non-trainable params: 0
_________________________________________________________________


In [213]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(x_train.values.reshape(-1,1),y_train)

In [214]:
model.score(x_train.values.reshape(-1,1),y_train)

1.0

In [215]:
z=model.predict(x_test.values.reshape(-1,1))
z

array([1619., 1619.])

In [216]:
from sklearn.linear_model import LogisticRegression 
log=LogisticRegression()
log.fit(x_train.values.reshape(-1,1),y_train)

In [217]:
log.score(x_train.values.reshape(-1,1),y_train)

1.0