#### Data Cleaning and Preparation
The goal is to create a dashboard in Tableau to analyze and visualize data from the Missing Migrant Project

In [1]:
import numpy as np 
import pandas as pd 

In [2]:
# Import dataset
mmp_file = "../resources_data/missing_migrants_dataset.csv"
mmp_df = pd.read_csv(mmp_file)
mmp_df.head()

Unnamed: 0,Web ID,Region of Incident,Reported Date,Reported Year,Reported Month,Number Dead,Minimum Estimated Number of Missing,Total Dead and Missing,Number of Survivors,Number of Females,Number of Males,Number of Children,Cause of Death,Location Description,Information Source,Location Coordinates,Migration Route,URL,UNSD Geographical Grouping,Source Quality
0,51591,Mediterranean,28-Mar-19,2019,Mar,,2.0,2,36.0,,2.0,,Presumed drowning,"Off the coast of Chios, Greece",Hellenic Coast Guard via IOM Greece,"38.362368696592, 26.172509473654",Eastern Mediterranean,,Uncategorized,5
1,51588,Mediterranean,26-Mar-19,2019,Mar,4.0,,4,11.0,3.0,,1.0,Presumed drowning,"Off the coast of Ayvacık district, Çanakkale p...",Turkish Coast Guard via IOM Turkey,"39.441975591614, 26.378816195919",Eastern Mediterranean,http://bit.ly/2YmiPAN,Uncategorized,5
2,51589,Mediterranean,26-Mar-19,2019,Mar,1.0,,1,,,,,Presumed drowning,"Body recovered on Playa del Tarajal, Ceuta, Sp...","Ceuta al día, El Pueblo de Ceuta","35.871901875921, -5.343037665842",Western Mediterranean,"http://bit.ly/2uyj7qO, http://bit.ly/2uwj5zC",Uncategorized,3
3,51590,Mediterranean,26-Mar-19,2019,Mar,1.0,,1,,,,,Presumed drowning,"Body recovered on beach near Tetouan, Morocco ...",El Pueblo de Ceuta,"35.635115912988, -5.275650103548",Western Mediterranean,http://bit.ly/2uwj5zC,Uncategorized,1
4,51587,Central America,25-Mar-19,2019,Mar,1.0,,1,,,1.0,,Fall from train,"Train tracks in Teacalco, Tlaxcala, Mexico","Megalópolis, Línea de contraste","19.334475177429, -98.069823987538",,"http://bit.ly/2uvDIvH, http://bit.ly/2TXAFLS",Central America,3


In [3]:
# Drop columns that are not needed for this analysis

mmp_trim = mmp_df.drop(columns=['URL'])
mmp_trim.head()

Unnamed: 0,Web ID,Region of Incident,Reported Date,Reported Year,Reported Month,Number Dead,Minimum Estimated Number of Missing,Total Dead and Missing,Number of Survivors,Number of Females,Number of Males,Number of Children,Cause of Death,Location Description,Information Source,Location Coordinates,Migration Route,UNSD Geographical Grouping,Source Quality
0,51591,Mediterranean,28-Mar-19,2019,Mar,,2.0,2,36.0,,2.0,,Presumed drowning,"Off the coast of Chios, Greece",Hellenic Coast Guard via IOM Greece,"38.362368696592, 26.172509473654",Eastern Mediterranean,Uncategorized,5
1,51588,Mediterranean,26-Mar-19,2019,Mar,4.0,,4,11.0,3.0,,1.0,Presumed drowning,"Off the coast of Ayvacık district, Çanakkale p...",Turkish Coast Guard via IOM Turkey,"39.441975591614, 26.378816195919",Eastern Mediterranean,Uncategorized,5
2,51589,Mediterranean,26-Mar-19,2019,Mar,1.0,,1,,,,,Presumed drowning,"Body recovered on Playa del Tarajal, Ceuta, Sp...","Ceuta al día, El Pueblo de Ceuta","35.871901875921, -5.343037665842",Western Mediterranean,Uncategorized,3
3,51590,Mediterranean,26-Mar-19,2019,Mar,1.0,,1,,,,,Presumed drowning,"Body recovered on beach near Tetouan, Morocco ...",El Pueblo de Ceuta,"35.635115912988, -5.275650103548",Western Mediterranean,Uncategorized,1
4,51587,Central America,25-Mar-19,2019,Mar,1.0,,1,,,1.0,,Fall from train,"Train tracks in Teacalco, Tlaxcala, Mexico","Megalópolis, Línea de contraste","19.334475177429, -98.069823987538",,Central America,3


In [4]:
# Check datatypes
mmp_trim.dtypes

Web ID                                   int64
Region of Incident                      object
Reported Date                           object
Reported Year                            int64
Reported Month                          object
Number Dead                            float64
Minimum Estimated Number of Missing    float64
Total Dead and Missing                   int64
Number of Survivors                    float64
Number of Females                      float64
Number of Males                        float64
Number of Children                     float64
Cause of Death                          object
Location Description                    object
Information Source                      object
Location Coordinates                    object
Migration Route                         object
UNSD Geographical Grouping              object
Source Quality                           int64
dtype: object

In [5]:
# Separate lat and long coordinates 

# Create two lists for the loop results to be placed
lat = []
lon = []

# For each row in a varible,
for row in mmp_trim['Location Coordinates']:
    try:
        # Split the row by comma and append
        # everything before the comma to lat
        
        lat.append(row.split(',')[0])
        
        # Split the row by comma and append
        # everything after the comma to lon
        
        lon.append(row.split(',')[1])
    except:
        # append a missing value to lat
        
        lat.append(np.NaN)
        
        # append a missing value to lon
        lon.append(np.NaN)
        
# Create two new columns from lat and lon
mmp_trim['Latitude'] = lat
mmp_trim['Longitude'] = lon

In [6]:
mmp_trim.head()

Unnamed: 0,Web ID,Region of Incident,Reported Date,Reported Year,Reported Month,Number Dead,Minimum Estimated Number of Missing,Total Dead and Missing,Number of Survivors,Number of Females,...,Number of Children,Cause of Death,Location Description,Information Source,Location Coordinates,Migration Route,UNSD Geographical Grouping,Source Quality,Latitude,Longitude
0,51591,Mediterranean,28-Mar-19,2019,Mar,,2.0,2,36.0,,...,,Presumed drowning,"Off the coast of Chios, Greece",Hellenic Coast Guard via IOM Greece,"38.362368696592, 26.172509473654",Eastern Mediterranean,Uncategorized,5,38.362368696592,26.172509473654
1,51588,Mediterranean,26-Mar-19,2019,Mar,4.0,,4,11.0,3.0,...,1.0,Presumed drowning,"Off the coast of Ayvacık district, Çanakkale p...",Turkish Coast Guard via IOM Turkey,"39.441975591614, 26.378816195919",Eastern Mediterranean,Uncategorized,5,39.441975591614,26.378816195919
2,51589,Mediterranean,26-Mar-19,2019,Mar,1.0,,1,,,...,,Presumed drowning,"Body recovered on Playa del Tarajal, Ceuta, Sp...","Ceuta al día, El Pueblo de Ceuta","35.871901875921, -5.343037665842",Western Mediterranean,Uncategorized,3,35.871901875921,-5.343037665842
3,51590,Mediterranean,26-Mar-19,2019,Mar,1.0,,1,,,...,,Presumed drowning,"Body recovered on beach near Tetouan, Morocco ...",El Pueblo de Ceuta,"35.635115912988, -5.275650103548",Western Mediterranean,Uncategorized,1,35.635115912988,-5.275650103548
4,51587,Central America,25-Mar-19,2019,Mar,1.0,,1,,,...,,Fall from train,"Train tracks in Teacalco, Tlaxcala, Mexico","Megalópolis, Línea de contraste","19.334475177429, -98.069823987538",,Central America,3,19.334475177429,-98.069823987538


In [9]:
# Drop Location Coordinates 

mmp_update = mmp_trim.drop(columns=['Location Coordinates'])
mmp_update

Unnamed: 0,Web ID,Region of Incident,Reported Date,Reported Year,Reported Month,Number Dead,Minimum Estimated Number of Missing,Total Dead and Missing,Number of Survivors,Number of Females,Number of Males,Number of Children,Cause of Death,Location Description,Information Source,Migration Route,UNSD Geographical Grouping,Source Quality,Latitude,Longitude
0,51591,Mediterranean,28-Mar-19,2019,Mar,,2.0,2,36.0,,2.0,,Presumed drowning,"Off the coast of Chios, Greece",Hellenic Coast Guard via IOM Greece,Eastern Mediterranean,Uncategorized,5,38.362368696592,26.172509473654
1,51588,Mediterranean,26-Mar-19,2019,Mar,4.0,,4,11.0,3.0,,1.0,Presumed drowning,"Off the coast of Ayvacık district, Çanakkale p...",Turkish Coast Guard via IOM Turkey,Eastern Mediterranean,Uncategorized,5,39.441975591614,26.378816195919
2,51589,Mediterranean,26-Mar-19,2019,Mar,1.0,,1,,,,,Presumed drowning,"Body recovered on Playa del Tarajal, Ceuta, Sp...","Ceuta al día, El Pueblo de Ceuta",Western Mediterranean,Uncategorized,3,35.871901875921,-5.343037665842
3,51590,Mediterranean,26-Mar-19,2019,Mar,1.0,,1,,,,,Presumed drowning,"Body recovered on beach near Tetouan, Morocco ...",El Pueblo de Ceuta,Western Mediterranean,Uncategorized,1,35.635115912988,-5.275650103548
4,51587,Central America,25-Mar-19,2019,Mar,1.0,,1,,,1.0,,Fall from train,"Train tracks in Teacalco, Tlaxcala, Mexico","Megalópolis, Línea de contraste",,Central America,3,19.334475177429,-98.069823987538
5,51580,US-Mexico Border,23-Mar-19,2019,Mar,1.0,,1,,,,,Unknown (skeletal remains),"Pima County jurisdiction, Arizona, USA",Pima County Office of the Medical Examiner,,Northern America,5,32.057499000000,-111.666072500000
6,51581,US-Mexico Border,23-Mar-19,2019,Mar,1.0,,1,,,,,Unknown (skeletal remains),"Pima County jurisdiction, Arizona, USA",Pima County Office of the Medical Examiner,,Northern America,5,32.057499000000,-111.666072500000
7,51585,Southeast Asia,23-Mar-19,2019,Mar,8.0,2.0,10,,1.0,,,Vehicle Accident,"Sa Setthi intersection in Tambon Ban Mai, Tha ...","The Nation, Vietnam+",,South-eastern Asia,3,13.865704814542,99.587248723929
8,51586,Mediterranean,23-Mar-19,2019,Mar,4.0,4.0,8,,3.0,5.0,,Presumed drowning,"Off the coast of Sfax, Tunisia","Shems FM, Tunisie Numérique, Kapitalis",Central Mediterranean,Uncategorized,3,35.317034468315,11.078928258810
9,51578,US-Mexico Border,21-Mar-19,2019,Mar,1.0,,1,,,,,Unknown (skeletal remains),"Pima County jurisdiction, Arizona, USA",Pima County Office of the Medical Examiner,,Northern America,5,32.057499000000,-111.666072500000


In [10]:
# Export CSV

mmp_update.to_csv('../resources_data/cleaned_mmp.csv')