In [535]:
%matplotlib notebook
import pandas as pd
import numpy as np
import os
import csv
import scipy.stats as st
from scipy.stats import linregress
import matplotlib.pyplot as plt


#data file to work with
file = os.path.join('cleanData', 'CleanFullDataSet.csv')

#read data file
main_data = pd.read_csv(file, low_memory = False)

In [536]:
labor_df = pd.DataFrame(main_data[['yearOfRegistration', 'gender', 'ageBroad', 'Citizenship Country','typeOfLabourConcatenated','typeOfSexConcatenated','Exploit Sub-Region', 'Exploit Country']])
labor_df.head(5)

Unnamed: 0,yearOfRegistration,gender,ageBroad,Citizenship Country,typeOfLabourConcatenated,typeOfSexConcatenated,Exploit Sub-Region,Exploit Country
0,2002,Female,18--20,Colombia,unknown,unknown,unknown,unknown
1,2002,Female,18--20,Colombia,unknown,unknown,unknown,unknown
2,2002,Female,18--20,Colombia,unknown,unknown,unknown,unknown
3,2002,Female,18--20,Colombia,unknown,unknown,unknown,unknown
4,2002,Female,18--20,Colombia,unknown,unknown,unknown,unknown


In [537]:
labor_pie_df = labor_df['typeOfLabourConcatenated'].value_counts()

#drop unknowns from the labour df
labor_pie_df = labor_pie_df.drop('unknown')

#Plot a pie chart of the Labor demographics by sector
labor_pie_df.plot(kind="pie",startangle=120, title = "Labor Exploitation by Sector", shadow=True, colors = ['crimson', 'lawngreen', 'deepskyblue', 'gold', 'forestgreen', 'royalblue', 'orangered', 'aqua'], figsize=(9, 9))

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x28714a4bd60>

In [538]:
country_count = labor_df["Exploit Country"].value_counts()

In [539]:
exp_count = labor_df['Exploit Sub-Region'].value_counts()
exp_count


Eastern Europe                     14020
Northern America                   12512
unknown                            10175
South-eastern Asia                  6139
Western Asia                        1776
Sub-Saharan Africa                  1268
Southern Europe                      615
Central Asia                         495
Latin America and the Caribbean      455
Eastern Asia                         221
Southern Asia                        155
Northern Africa                       24
Western Europe                        24
Northern Europe                       17
Name: Exploit Sub-Region, dtype: int64

In [540]:
sex_df = labor_df['typeOfSexConcatenated'].value_counts()
sex_df = sex_df.drop('unknown')
#sex_df.plot(kind="pie",startangle=120, title = "Sex Statistics", shadow=True, colors = ['crimson', 'lawngreen', 'deepskyblue', 'gold', 'forestgreen', 'royalblue', 'orangered', 'aqua'], figsize=(9, 9))

In [541]:
#labour by country data frame 
country_df = pd.DataFrame(main_data[['typeOfLabourConcatenated', 'Exploit Sub-Region']])
country_df.head()
known_labor_df = country_df[country_df.typeOfLabourConcatenated != 'unknown']
region_df = pd.DataFrame(main_data[['typeOfLabourConcatenated', 'Exploit Country']])
known_labor_df2 = region_df[region_df.typeOfLabourConcatenated != 'unknown']

In [542]:
#Datatable of Citizenship Country and Labour Sector
known_labor_df.tail(5)

Unnamed: 0,typeOfLabourConcatenated,Exploit Sub-Region
48753,Peddling,Northern America
48754,Peddling,Northern America
48755,Peddling,Northern America
48756,Peddling,Northern America
48757,Peddling,Northern America


In [543]:
region_df = known_labor_df.groupby(["Exploit Sub-Region"])
region_df = region_df['typeOfLabourConcatenated'].value_counts()


In [544]:
eastern_europe_labor_df = country_df[country_df["Exploit Sub-Region"] == 'Northern America']
eastern_pie_df = eastern_europe_labor_df['typeOfLabourConcatenated'].value_counts()
eastern_pie_df = eastern_pie_df.drop('unknown')
#eastern_pie_df.plot(kind="pie",startangle=120, title = "Labor Exploitation in Eastern Europe", shadow=True, colors = ['crimson', 'lawngreen', 'deepskyblue', 'gold', 'forestgreen', 'royalblue', 'orangered', 'aqua'], figsize=(9, 9))

In [545]:
#List of subregions
subregions = ["Eastern Europe", "Northern America", "unknown", "South-eastern Asia", "Western Asia", "Sub-Saharan Africa", "Southern Europe", "Central Asia", "Latin America and the Caribbean", "Eastern Asia", "Southern Asia", "Northern Africa", "Western Europe", "Northern Europe"]

#Create table for labor sector exploitation per sub-region
plotdata = pd.DataFrame({
    "Domestic work":[11, 0, 1363, 896, 272, 0, 0, 0, 103, 42, 46, 11, 0, 0],
    "Construction":[1107, 0, 0, 0, 0, 0, 0, 126, 0, 0, 21, 0, 0, 25],
    "Begging":[0, 22, 0, 0, 0, 103, 0, 35, 0, 0, 0, 0, 0, 0],
    "Aquafarming":[0, 0, 13, 94, 0, 0, 0, 95, 0, 0, 0, 0, 35, 0],
    "Hospitality":[15, 0, 26, 15, 13, 0, 85, 0, 35, 0, 13, 0, 75, 0],
    "Manufacturing":[325, 0, 39, 89, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0],
    "Peddling":[0, 58, 0, 0, 14, 0, 0, 85, 0, 0, 14, 0, 0, 45],
    "Other":[5, 52, 1, 12, 10, 47, 0, 75, 5, 7, 0, 0, 0, 0],
    "Agriculture":[133, 19, 1, 12, 0, 0, 65, 0, 15, 7, 0, 0, 0, 95],
    }, index = subregions
)

In [546]:
plotdata
#Table of sector of Labor Exploitation by Sub-region

Unnamed: 0,Domestic work,Construction,Begging,Aquafarming,Hospitality,Manufacturing,Peddling,Other,Agriculture
Eastern Europe,11,1107,0,0,15,325,0,5,133
Northern America,0,0,22,0,0,0,58,52,19
unknown,1363,0,0,13,26,39,0,1,1
South-eastern Asia,896,0,0,94,15,89,0,12,12
Western Asia,272,0,0,0,13,0,14,10,0
Sub-Saharan Africa,0,0,103,0,0,0,0,47,0
Southern Europe,0,0,0,0,85,0,0,0,65
Central Asia,0,126,35,95,0,0,85,75,0
Latin America and the Caribbean,103,0,0,0,35,0,0,5,15
Eastern Asia,42,0,0,0,0,0,0,7,7


In [547]:
#Create a stacked percentage bar graph of Labor Exploitation per sector per region
stacked_data = plotdata.apply(lambda x: x*100/sum(x), axis=1)
stacked_data.plot(kind="barh", stacked=True)
plt.title("Labor Exploitation Sector by Subregion")
plt.xlabel("Labor Sector Percentage (%)")
plt.ylabel("Subregion")
plt.legend(loc="best", bbox_to_anchor=(0.5, -0.15), ncol= 2)
plt.tight_layout()


<IPython.core.display.Javascript object>

In [548]:
#TO DO: Pick certain high volume countries for in depth analysis?

#List of high volume countries to pick: United States, Ukraine, Moldova, Russia, Philippines, Ghana, Indonesia, Malaysia,
#Ghana, Indonesia, Malaysia, Cambodia, UAE, Thailand
example_countries_df = known_labor_df2.groupby(["Exploit Country"])
example_countries_df = example_countries_df['typeOfLabourConcatenated'].value_counts()
example_countries_df

Exploit Country           typeOfLabourConcatenated
Afghanistan               Begging                       24
                          Construction                  21
                          Hospitality                   13
Cambodia                  Other                         12
China                     Other                          7
Ghana                     Other                         47
Haiti                     Domestic work                103
                          Other                          5
Hong Kong                 Domestic work                 31
India                     Domestic work                 46
Indonesia                 Aquafarming                   94
                          Domestic work                 21
Japan                     Domestic work                 11
Jordan                    Domestic work                 85
Kazakhstan                Construction                 126
Kuwait                    Domestic work                 90
Libya

In [549]:
#TO DO: Sex Exploitation Sector Analysis

In [550]:
#TO DO: Compare exploitation statistics with economic data per country/region (maybe?)