This file contains the exploration of regional data by case counts

In [1]:
# import directories
import os
import csv
import pandas as pd
import gmaps
import requests
import json

# create path for data file
data_csv = os.path.join('..','cleanData','CleanFullDataSet.csv')

# use read_csv function from pandas directory to create a data frame
data_df=pd.read_csv(data_csv, low_memory = False)


# Google developer API key
from config import gkey

# Access maps with unique API key
gmaps.configure(api_key=gkey)

In [2]:
data_df.columns

Index(['yearOfRegistration', 'Datasource', 'gender', 'ageBroad',
       'majorityStatus', 'majorityStatusAtExploit', 'majorityEntry',
       'citizenship', 'meansOfControlDebtBondage',
       'meansOfControlTakesEarnings', 'meansOfControlRestrictsFinancialAccess',
       'meansOfControlThreats', 'meansOfControlPsychologicalAbuse',
       'meansOfControlPhysicalAbuse', 'meansOfControlSexualAbuse',
       'meansOfControlFalsePromises', 'meansOfControlPsychoactiveSubstances',
       'meansOfControlRestrictsMovement', 'meansOfControlRestrictsMedicalCare',
       'meansOfControlExcessiveWorkingHours', 'meansOfControlUsesChildren',
       'meansOfControlThreatOfLawEnforcement',
       'meansOfControlWithholdsNecessities',
       'meansOfControlWithholdsDocuments', 'meansOfControlOther',
       'meansOfControlNotSpecified', 'meansOfControlConcatenated',
       'isForcedLabour', 'isSexualExploit', 'isOtherExploit', 'isSexAndLabour',
       'isForcedMarriage', 'isForcedMilitary', 'isOrganRemova

In [3]:
# reduce columns in the data frame
data_df = data_df[['yearOfRegistration', 'Datasource','gender', 'ageBroad',
       'majorityStatus', 'majorityStatusAtExploit', 'majorityEntry',
       'citizenship','Citizenship Region', 'Citizenship Sub-Region',
       'Citizenship Intermediate Region', 'Citizenship Country','CountryOfExploitation',
       'Exploit Region', 'Exploit Sub-Region', 'Exploit Intermediate Region',
       'Exploit Country']]
data_df.head()

Unnamed: 0,yearOfRegistration,Datasource,gender,ageBroad,majorityStatus,majorityStatusAtExploit,majorityEntry,citizenship,Citizenship Region,Citizenship Sub-Region,Citizenship Intermediate Region,Citizenship Country,CountryOfExploitation,Exploit Region,Exploit Sub-Region,Exploit Intermediate Region,Exploit Country
0,2002,Case Management,Female,18--20,Adult,unknown,unknown,CO,Americas,Latin America and the Caribbean,South America,Colombia,unknown,unknown,unknown,unknown,unknown
1,2002,Case Management,Female,18--20,Adult,unknown,unknown,CO,Americas,Latin America and the Caribbean,South America,Colombia,unknown,unknown,unknown,unknown,unknown
2,2002,Case Management,Female,18--20,Adult,unknown,unknown,CO,Americas,Latin America and the Caribbean,South America,Colombia,unknown,unknown,unknown,unknown,unknown
3,2002,Case Management,Female,18--20,Adult,unknown,unknown,CO,Americas,Latin America and the Caribbean,South America,Colombia,unknown,unknown,unknown,unknown,unknown
4,2002,Case Management,Female,18--20,Adult,unknown,unknown,CO,Americas,Latin America and the Caribbean,South America,Colombia,unknown,unknown,unknown,unknown,unknown


In [4]:
# explore data using grouping of citizenship region and gender
data_df.groupby(['Citizenship Region', 'gender']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,yearOfRegistration,Datasource,ageBroad,majorityStatus,majorityStatusAtExploit,majorityEntry,citizenship,Citizenship Sub-Region,Citizenship Intermediate Region,Citizenship Country,CountryOfExploitation,Exploit Region,Exploit Sub-Region,Exploit Intermediate Region,Exploit Country
Citizenship Region,gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Africa,Female,585,585,585,585,585,585,585,585,585,585,585,585,585,445,585
Africa,Male,780,780,780,780,780,780,780,780,780,780,780,780,780,767,780
Americas,Female,3942,3942,3942,3942,3942,3942,3942,3942,332,3942,3942,3942,3942,332,3942
Americas,Male,194,194,194,194,194,194,194,194,168,194,194,194,194,149,194
Asia,Female,11725,11725,11725,11725,11725,11725,11725,11725,0,11725,11725,11639,11639,6675,11639
Asia,Male,6267,6267,6267,6267,6267,6267,6267,6267,0,6267,6267,6169,6169,2648,6169
Europe,Female,10518,10518,10518,10518,10518,10518,10518,10518,0,10518,10518,10019,10019,732,10019
Europe,Male,5626,5626,5626,5626,5626,5626,5626,5626,0,5626,5626,5432,5432,0,5432
unknown,Female,8736,8736,8736,8736,8736,8736,8736,8736,8736,8736,8736,8736,8736,87,8736
unknown,Male,400,400,400,400,400,400,400,400,400,400,400,400,400,63,400


In [5]:
# explore data using grouping of gender
data_df.groupby('gender').count()

Unnamed: 0_level_0,yearOfRegistration,Datasource,ageBroad,majorityStatus,majorityStatusAtExploit,majorityEntry,citizenship,Citizenship Region,Citizenship Sub-Region,Citizenship Intermediate Region,Citizenship Country,CountryOfExploitation,Exploit Region,Exploit Sub-Region,Exploit Intermediate Region,Exploit Country
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Female,35506,35506,35506,35506,35506,35506,35506,35506,35506,9653,35506,35506,34921,34921,8271,34921
Male,13267,13267,13267,13267,13267,13267,13267,13267,13267,1348,13267,13267,12975,12975,3627,12975


In [6]:
# explore data using grouping of exploit region
data_df.groupby('Exploit Region').count()

Unnamed: 0_level_0,yearOfRegistration,Datasource,gender,ageBroad,majorityStatus,majorityStatusAtExploit,majorityEntry,citizenship,Citizenship Region,Citizenship Sub-Region,Citizenship Intermediate Region,Citizenship Country,CountryOfExploitation,Exploit Sub-Region,Exploit Intermediate Region,Exploit Country
Exploit Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Africa,1292,1292,1292,1292,1292,1292,1292,1292,1292,1292,1236,1292,1292,1292,1268,1292
Americas,12967,12967,12967,12967,12967,12967,12967,12967,12967,12967,9211,12967,12967,12967,455,12967
Asia,8786,8786,8786,8786,8786,8786,8786,8786,8786,8786,149,8786,8786,8786,0,8786
Europe,14676,14676,14676,14676,14676,14676,14676,14676,14676,14676,144,14676,14676,14676,0,14676
unknown,10175,10175,10175,10175,10175,10175,10175,10175,10175,10175,261,10175,10175,10175,10175,10175


In [7]:
# create data frame grouping by explil region and number of cases
exploit_df = data_df.groupby('Exploit Region').count()
exploit_df = exploit_df[['yearOfRegistration']]
exploit_df = exploit_df.reset_index()

# exclude where exploit region is unknown
exploit_df = exploit_df.loc[exploit_df['Exploit Region'] !=  'unknown', :]
exploit_df

Unnamed: 0,Exploit Region,yearOfRegistration
0,Africa,1292
1,Americas,12967
2,Asia,8786
3,Europe,14676


In [8]:
# manually set latitude and longitude
exploit_df['Lat'] = [-8.783195, 37.090240, 34.161818, 47.159840]
exploit_df['Lon'] = [34.508522, -95.712891, 110.616993, 8.419894]

In [9]:
exploit_df = exploit_df.rename(columns={'yearOfRegistration': 'Count'})

In [10]:
exploit_df

Unnamed: 0,Exploit Region,Count,Lat,Lon
0,Africa,1292,-8.783195,34.508522
1,Americas,12967,37.09024,-95.712891
2,Asia,8786,34.161818,110.616993
3,Europe,14676,47.15984,8.419894


In [11]:
locations = exploit_df[['Lat', 'Lon']]
weights = exploit_df['Count']


In [13]:
# create google map
fig = gmaps.figure()

markers = gmaps.marker_layer(locations, label = exploit_df['Exploit Region'])

fig.add_layer(gmaps.heatmap_layer(locations, weights=weights, point_radius=50,))
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(height='420px'))

In [14]:
# use this function to return latitude and longitude of a location
def getlatlng (params):
    baseURL = "https://maps.googleapis.com/maps/api/geocode/json"
    response = requests.get(baseURL, params=params)
    data = response.json()
    lat = data['results'][0]['geometry']['location']['lat']
    lng = data['results'][0]['geometry']['location']['lng']
    return lat,lng

In [15]:
lat_lng_param = {'address': "Sub-Saharan Africa",
          'key': gkey
         }
getlatlng (lat_lng_param)

(23.4162027, 25.66283)

In [16]:
fig2 = gmaps.figure()
# Assign the marker layer to a variable
markers = gmaps.marker_layer(locations, label = exploit_df['Exploit Region'])
# Add the layer to the map
fig2.add_layer(markers)
fig2

Figure(layout=FigureLayout(height='420px'))

In [None]:
# # Using the template add the hotel marks to the heatmap
# info_box_template = """
# <dl>
# <dt>Name</dt><dd>{Hotel Name}</dd>
# <dt>City</dt><dd>{City}</dd>
# <dt>Country</dt><dd>{Country}</dd>
# </dl>
# """
# # Store the DataFrame Row
# # NOTE: be sure to update with your DataFrame name
# hotel_info = [info_box_template.format(**row) for index, row in hotel_df.iterrows()]
# locations = hotel_df[["Lat", "Lng"]]