#### Data Cleaning and Preparation
The goal is to create a dashboard in Tableau to analyze and visualize data from the Missing Migrant Project

In [1]:
import numpy as np 
import pandas as pd 

In [2]:
# Import dataset
mmp_file = "../resources_data/missing_migrants_dataset.csv"
mmp_df = pd.read_csv(mmp_file)
mmp_df.head()

Unnamed: 0,Web ID,Region of Incident,Reported Date,Reported Year,Reported Month,Number Dead,Minimum Estimated Number of Missing,Total Dead and Missing,Number of Survivors,Number of Females,Number of Males,Number of Children,Cause of Death,Location Description,Information Source,Location Coordinates,Migration Route,URL,UNSD Geographical Grouping,Source Quality
0,51591,Mediterranean,28-Mar-19,2019,Mar,,2.0,2,36.0,,2.0,,Presumed drowning,"Off the coast of Chios, Greece",Hellenic Coast Guard via IOM Greece,"38.362368696592, 26.172509473654",Eastern Mediterranean,,Uncategorized,5
1,51588,Mediterranean,26-Mar-19,2019,Mar,4.0,,4,11.0,3.0,,1.0,Presumed drowning,"Off the coast of Ayvacık district, Çanakkale p...",Turkish Coast Guard via IOM Turkey,"39.441975591614, 26.378816195919",Eastern Mediterranean,http://bit.ly/2YmiPAN,Uncategorized,5
2,51589,Mediterranean,26-Mar-19,2019,Mar,1.0,,1,,,,,Presumed drowning,"Body recovered on Playa del Tarajal, Ceuta, Sp...","Ceuta al día, El Pueblo de Ceuta","35.871901875921, -5.343037665842",Western Mediterranean,"http://bit.ly/2uyj7qO, http://bit.ly/2uwj5zC",Uncategorized,3
3,51590,Mediterranean,26-Mar-19,2019,Mar,1.0,,1,,,,,Presumed drowning,"Body recovered on beach near Tetouan, Morocco ...",El Pueblo de Ceuta,"35.635115912988, -5.275650103548",Western Mediterranean,http://bit.ly/2uwj5zC,Uncategorized,1
4,51587,Central America,25-Mar-19,2019,Mar,1.0,,1,,,1.0,,Fall from train,"Train tracks in Teacalco, Tlaxcala, Mexico","Megalópolis, Línea de contraste","19.334475177429, -98.069823987538",,"http://bit.ly/2uvDIvH, http://bit.ly/2TXAFLS",Central America,3


In [3]:
# Drop columns that are not needed for this analysis

mmp_trim = mmp_df.drop(columns=['URL', 'Minimum Estimated Number of Missing'])
mmp_trim.head()

Unnamed: 0,Web ID,Region of Incident,Reported Date,Reported Year,Reported Month,Number Dead,Total Dead and Missing,Number of Survivors,Number of Females,Number of Males,Number of Children,Cause of Death,Location Description,Information Source,Location Coordinates,Migration Route,UNSD Geographical Grouping,Source Quality
0,51591,Mediterranean,28-Mar-19,2019,Mar,,2,36.0,,2.0,,Presumed drowning,"Off the coast of Chios, Greece",Hellenic Coast Guard via IOM Greece,"38.362368696592, 26.172509473654",Eastern Mediterranean,Uncategorized,5
1,51588,Mediterranean,26-Mar-19,2019,Mar,4.0,4,11.0,3.0,,1.0,Presumed drowning,"Off the coast of Ayvacık district, Çanakkale p...",Turkish Coast Guard via IOM Turkey,"39.441975591614, 26.378816195919",Eastern Mediterranean,Uncategorized,5
2,51589,Mediterranean,26-Mar-19,2019,Mar,1.0,1,,,,,Presumed drowning,"Body recovered on Playa del Tarajal, Ceuta, Sp...","Ceuta al día, El Pueblo de Ceuta","35.871901875921, -5.343037665842",Western Mediterranean,Uncategorized,3
3,51590,Mediterranean,26-Mar-19,2019,Mar,1.0,1,,,,,Presumed drowning,"Body recovered on beach near Tetouan, Morocco ...",El Pueblo de Ceuta,"35.635115912988, -5.275650103548",Western Mediterranean,Uncategorized,1
4,51587,Central America,25-Mar-19,2019,Mar,1.0,1,,,1.0,,Fall from train,"Train tracks in Teacalco, Tlaxcala, Mexico","Megalópolis, Línea de contraste","19.334475177429, -98.069823987538",,Central America,3


In [4]:
# Check datatypes
mmp_trim.dtypes

Web ID                          int64
Region of Incident             object
Reported Date                  object
Reported Year                   int64
Reported Month                 object
Number Dead                   float64
Total Dead and Missing          int64
Number of Survivors           float64
Number of Females             float64
Number of Males               float64
Number of Children            float64
Cause of Death                 object
Location Description           object
Information Source             object
Location Coordinates           object
Migration Route                object
UNSD Geographical Grouping     object
Source Quality                  int64
dtype: object

In [5]:
# Separate lat and long coordinates 

# Create two lists for the loop results to be placed
lat = []
lon = []

# For each row in a varible,
for row in mmp_trim['Location Coordinates']:
    try:
        # Split the row by comma and append
        # everything before the comma to lat
        
        lat.append(row.split(',')[0])
        
        # Split the row by comma and append
        # everything after the comma to lon
        
        lon.append(row.split(',')[1])
    except:
        # append a missing value to lat
        
        lat.append(np.NaN)
        
        # append a missing value to lon
        lon.append(np.NaN)
        
# Create two new columns from lat and lon
mmp_trim['Latitude'] = lat
mmp_trim['Longitude'] = lon

In [6]:
mmp_trim.head()

Unnamed: 0,Web ID,Region of Incident,Reported Date,Reported Year,Reported Month,Number Dead,Total Dead and Missing,Number of Survivors,Number of Females,Number of Males,Number of Children,Cause of Death,Location Description,Information Source,Location Coordinates,Migration Route,UNSD Geographical Grouping,Source Quality,Latitude,Longitude
0,51591,Mediterranean,28-Mar-19,2019,Mar,,2,36.0,,2.0,,Presumed drowning,"Off the coast of Chios, Greece",Hellenic Coast Guard via IOM Greece,"38.362368696592, 26.172509473654",Eastern Mediterranean,Uncategorized,5,38.362368696592,26.172509473654
1,51588,Mediterranean,26-Mar-19,2019,Mar,4.0,4,11.0,3.0,,1.0,Presumed drowning,"Off the coast of Ayvacık district, Çanakkale p...",Turkish Coast Guard via IOM Turkey,"39.441975591614, 26.378816195919",Eastern Mediterranean,Uncategorized,5,39.441975591614,26.378816195919
2,51589,Mediterranean,26-Mar-19,2019,Mar,1.0,1,,,,,Presumed drowning,"Body recovered on Playa del Tarajal, Ceuta, Sp...","Ceuta al día, El Pueblo de Ceuta","35.871901875921, -5.343037665842",Western Mediterranean,Uncategorized,3,35.871901875921,-5.343037665842
3,51590,Mediterranean,26-Mar-19,2019,Mar,1.0,1,,,,,Presumed drowning,"Body recovered on beach near Tetouan, Morocco ...",El Pueblo de Ceuta,"35.635115912988, -5.275650103548",Western Mediterranean,Uncategorized,1,35.635115912988,-5.275650103548
4,51587,Central America,25-Mar-19,2019,Mar,1.0,1,,,1.0,,Fall from train,"Train tracks in Teacalco, Tlaxcala, Mexico","Megalópolis, Línea de contraste","19.334475177429, -98.069823987538",,Central America,3,19.334475177429,-98.069823987538


In [7]:
# Drop Location Coordinates 

mmp_update = mmp_trim.drop(columns=['Location Coordinates'])
mmp_update.head()

Unnamed: 0,Web ID,Region of Incident,Reported Date,Reported Year,Reported Month,Number Dead,Total Dead and Missing,Number of Survivors,Number of Females,Number of Males,Number of Children,Cause of Death,Location Description,Information Source,Migration Route,UNSD Geographical Grouping,Source Quality,Latitude,Longitude
0,51591,Mediterranean,28-Mar-19,2019,Mar,,2,36.0,,2.0,,Presumed drowning,"Off the coast of Chios, Greece",Hellenic Coast Guard via IOM Greece,Eastern Mediterranean,Uncategorized,5,38.362368696592,26.172509473654
1,51588,Mediterranean,26-Mar-19,2019,Mar,4.0,4,11.0,3.0,,1.0,Presumed drowning,"Off the coast of Ayvacık district, Çanakkale p...",Turkish Coast Guard via IOM Turkey,Eastern Mediterranean,Uncategorized,5,39.441975591614,26.378816195919
2,51589,Mediterranean,26-Mar-19,2019,Mar,1.0,1,,,,,Presumed drowning,"Body recovered on Playa del Tarajal, Ceuta, Sp...","Ceuta al día, El Pueblo de Ceuta",Western Mediterranean,Uncategorized,3,35.871901875921,-5.343037665842
3,51590,Mediterranean,26-Mar-19,2019,Mar,1.0,1,,,,,Presumed drowning,"Body recovered on beach near Tetouan, Morocco ...",El Pueblo de Ceuta,Western Mediterranean,Uncategorized,1,35.635115912988,-5.275650103548
4,51587,Central America,25-Mar-19,2019,Mar,1.0,1,,,1.0,,Fall from train,"Train tracks in Teacalco, Tlaxcala, Mexico","Megalópolis, Línea de contraste",,Central America,3,19.334475177429,-98.069823987538


In [8]:
mmp_update.count() 

Web ID                        5333
Region of Incident            5333
Reported Date                 5333
Reported Year                 5333
Reported Month                5333
Number Dead                   5142
Total Dead and Missing        5333
Number of Survivors            749
Number of Females              897
Number of Males               2813
Number of Children             644
Cause of Death                5333
Location Description          5323
Information Source            5331
Migration Route               2660
UNSD Geographical Grouping    5322
Source Quality                5333
Latitude                      5332
Longitude                     5332
dtype: int64

In [9]:
mmp_update["Cause of Death"].value_counts() 

Drowning                                                                                                   968
Sickness and lack of access to medicines                                                                   757
Unknown                                                                                                    549
Unknown (skeletal remains)                                                                                 469
Vehicle Accident                                                                                           444
Presumed drowning                                                                                          347
Shot                                                                                                       171
Dehydration                                                                                                121
Starvation                                                                                                  89
H

In [10]:
# Combining similar causes together

mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Unknown (skeletal remains)": "Cause Unknown", "Unknown": "Cause Unknown", "Presumed drowning": "Drowning",
     "Drowning, Asphyxiation": "Drowning", "Crushed, Drowning": "Drowning", "Starvation, Presumed drowning": "Drowning"})
mmp_update.head()

Unnamed: 0,Web ID,Region of Incident,Reported Date,Reported Year,Reported Month,Number Dead,Total Dead and Missing,Number of Survivors,Number of Females,Number of Males,Number of Children,Cause of Death,Location Description,Information Source,Migration Route,UNSD Geographical Grouping,Source Quality,Latitude,Longitude
0,51591,Mediterranean,28-Mar-19,2019,Mar,,2,36.0,,2.0,,Drowning,"Off the coast of Chios, Greece",Hellenic Coast Guard via IOM Greece,Eastern Mediterranean,Uncategorized,5,38.362368696592,26.172509473654
1,51588,Mediterranean,26-Mar-19,2019,Mar,4.0,4,11.0,3.0,,1.0,Drowning,"Off the coast of Ayvacık district, Çanakkale p...",Turkish Coast Guard via IOM Turkey,Eastern Mediterranean,Uncategorized,5,39.441975591614,26.378816195919
2,51589,Mediterranean,26-Mar-19,2019,Mar,1.0,1,,,,,Drowning,"Body recovered on Playa del Tarajal, Ceuta, Sp...","Ceuta al día, El Pueblo de Ceuta",Western Mediterranean,Uncategorized,3,35.871901875921,-5.343037665842
3,51590,Mediterranean,26-Mar-19,2019,Mar,1.0,1,,,,,Drowning,"Body recovered on beach near Tetouan, Morocco ...",El Pueblo de Ceuta,Western Mediterranean,Uncategorized,1,35.635115912988,-5.275650103548
4,51587,Central America,25-Mar-19,2019,Mar,1.0,1,,,1.0,,Fall from train,"Train tracks in Teacalco, Tlaxcala, Mexico","Megalópolis, Línea de contraste",,Central America,3,19.334475177429,-98.069823987538


In [42]:
#mmp_update["Cause of Death"].value_counts() 

In [12]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Sickness and lack of access to medicines": "Sickness", "Sickness and lack of access to medicines, Starvation, Dehydration": "Sickness",
    "Mixed": "Unknown Cause", "Pulmonary edema and renal insufficiency": "Sickness",
     "Sickness and lack of access to medicines, Dehydration, Harsh weather/lack of adequate shelter, Excessive Physical Abuse": "Sickness",
     "Pulmonary edema and renal insufficiency, Sickness and lack of access to medicines": "Sickness"})

In [43]:
#mmp_update["Cause of Death"].value_counts() 

In [15]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Unknown Cause": "Cause Unknown", "Unknown (mummified and skeletal remains)": "Cause Unknown",
    "Dehydration": "Starvation, Dehydration", "Starvation": "Starvation, Dehydration",
     "Starvation, Dehydration, Harsh weather/lack of adequate shelter": "Starvation, Dehydration", 
     "Dehydration, Harsh weather/lack of adequate shelter": "Starvation, Dehydration", 
    "Starvation, Violence, Dehydration": "Starvation, Dehydration", "Exposure, Dehydration": "Starvation, Dehydration", 
    "Dehydration, Excessive Physical Abuse": "Starvation, Dehydration"})

In [44]:
#mmp_update["Cause of Death"].value_counts() 

In [19]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Hyperthermia": "Hypothermia", "Presumed hyperthermia": "Hypothermia", "Probable hyperthermia": "Hypothermia", 
    "Vehicle Accident": "Train/Vehicle Acc.", "Fall from train": "Train/Vehicle Acc.", 
    "Hit by train": "Train/Vehicle Acc.", "Train Accident": "Train/Vehicle Acc.", "Hit by vehicle": "Train/Vehicle Acc.", 
    "Vehicle Accident, Shot or stabbed": "Train/Vehicle Acc.", "Vehicle Accident, Hit by car": "Train/Vehicle Acc.", 
    "Hit by car": "Train/Vehicle Acc.", "Vehicle Accident, Crushed in back of truck": "Train/Vehicle Acc.", 
    "Sickness and lack of access to medicines, Dehydration, Harsh weather/lack of adequate shelter": "Sickness"})

In [45]:
#mmp_update["Cause of Death"].value_counts()

In [24]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Shot": "Violence/Murder", "Shot or stabbed": "Violence/Murder", "Violence": "Violence/Murder", 
    "Murdered": "Violence/Murder", "Suffocation": "Violence/Murder", "Excessive Physical Abuse": "Violence/Murder", 
    "Shot, Stabbed": "Violence/Murder", "Excessive Physical Abuse, Sexual abuse": "Violence/Murder", 
    "Starvation, Excessive Physical Abuse": "Starvation, Dehydration", "Asphyxiation": "Violence/Murder", 
    "Unknown (found on dinghy)": "Cause Unknown", "Hit by truck": "Train/Vehicle Acc.", 
    "Stabbed": "Violence/Murder", "Dehydration, Hyperthermia": "Hypothermia", "Sexual abuse, Shot or stabbed": "Violence/Murder", 
    "Harsh conditions, Suffocation": "Violence/Murder", "Hyperthermia, Presumed drowning": "Drowning", 
    "Killed by mortar shell": "Violence/Murder", "Violence while attempting to board boat": "Violence/Murder", 
    "Drowning, Dehydration": "Drowning", "Violence, Rape": "Violence/Murder", "Murdered, Asphyxiation": "Violence/Murder", 
    "Poison, Murdered": "Violence/Murder", "Excessive Physical Abuse, Starvation, Dehydration": "Violence/Murder", 
    "Gassed": "Violence/Murder", "Rape": "Violence/Murder", "Shot, Fall from train": "Violence/Murder", "Killed by crocodile": "Animal Attack", 
    "Killed by hippopotamus": "Animal Attack"})

In [46]:
#mmp_update["Cause of Death"].value_counts()

In [28]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Sickness and lack of access to medicines, Vehicle Accident": "Sickness", 
    "Sickness and lack of access to medicines, Harsh weather/lack of adequate shelter": "Sickness", 
    "Sickness and lack of access to medicines, Starvation": "Sickness", "Sickness and lack of access to medicines, Excessive Physical Abuse, Sexual abuse": "Sickness", 
    "Sickness and lack of access to medicines, Starvation, Dehydration, Harsh weather/lack of adequate shelter": "Sickness", 
    "Sickness and lack of access to medicines, Dehydration": "Sickness", "Starvation, Sexual abuse": "Starvation, Dehydration", 
    "Fall from vehicle": "Train/Vehicle Acc.", "Unknown (decomposed remains)": "Cause Unknown", 
    "Unknown (mummified remains)": "Cause Unknown", "Crushed in back of truck": "Train/Vehicle Acc.", 
    "Electrocution on railway": "Electrocution", "Starvation, Dehydration, Suffocation": "Starvation, Dehydration", 
    "Sickness and lack of access to medicines, Harsh weather/lack of adequate shelter, Suffocation": "Sickness", 
    "Exposure, Hyperthermia": "Hypothermia", "Harsh weather/lack of adequate shelter, Shot or stabbed": "Violence/Murder", 
    "Exhaustion, Dehydration": "Starvation, Dehydration", "Vehicle Accident, Crushed by bus" : "Train/Vehicle Acc.", "Bus fire": "Train/Vehicle Acc.", 
    "Fall from truck, Crushed": "Train/Vehicle Acc.", "Crushed in back of truck, Vehicle Accident": "Train/Vehicle Acc.", "Violence, Asphyxiation, Drowning": "Violence/Murder"})

In [33]:
#mmp_update["Cause of Death"].value_counts()

In [31]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Harsh conditions": "Harsh Weather/Shelter", "Exposure": "Harsh Weather/Shelter", 
    "Harsh weather/lack of adequate shelter": "Harsh Weather/Shelter", "Harsh weather/lack of adequate shelter, Excessive Physical Abuse, Sexual abuse": "Harsh Weather/Shelter", 
    "Starvation, Dehydration, Harsh weather/lack of adequate shelter, Suffocation": "Starvation, Dehydration", 
    "Suffocation, Excessive Physical Abuse": "Violence/Murder", "Presumed hyperthermia/dehydration": "Hypothermia", 
    "Starvation, Harsh conditions, Suffocation": "Starvation, Dehydration", "Unknown (multiple blunt force injuries)": "Cause Unknown", 
    "Sickness and lack of access to medicines, Starvation, Suffocation": "Sickness", "Hypoglycemia": "Sickness", "Harsh conditions, Vehicle Accident": "Harsh Weather/Shelter", 
    "Burned, Drowning": "Drowning", "Cervical cancer": "Sickness", "Unknown (found dead next to train tracks), Train Accident": "Cause Unknown", "Suffocation, Drowning": "Drowning", 
    "Starvation, Exhaustion": "Starvation, Dehydration", "Pulmonary edema": "Sickness"})

In [47]:
#mmp_update["Cause of Death"].value_counts()

In [40]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Electrocution, Train Accident": "Electrocution", "Sickness and lack of access to medicines, Starvation, Dehydration, Suffocation": "Sickness", 
    "Harsh weather/lack of adequate shelter, Suffocation": "Harsh Weather/Shelter", "Dehydration, Harsh conditions": "Starvation, Dehydration", 
    "Fall from truck": "Injury/Fall", "Fall": "Injury/Fall", "Unknown (found dead next to train tracks)": "Cause Unknown", 
    "Vehicle Accident, Excessive Physical Abuse": "Train/Vehicle Acc.", "Hit by train, Train Accident": "Train/Vehicle Acc.", 
    "Sickness and lack of access to medicines, Starvation, Excessive Physical Abuse": "Sickness", "Dehydration, Harsh weather/lack of adequate shelter, Suffocation": "Starvation, Dehydration", 
    "Dehydration, Suffocation": "Starvation, Dehydration", "Burned": "Injury/Fall", "Dehydration, Starvation": "Starvation, Dehydration", "Starvation, Dehydration, Excessive Physical Abuse": "Starvation, Dehydration",
    "Accident (non-vehicle)": "Injury/Fall", "Starvation, Dehydration, Vehicle Accident": "Starvation, Dehydration", 
    "Suffocation, Starvation, Harsh weather/lack of adequate shelter": "Harsh Weather/Shelter", "Starvation, Excessive Physical Abuse, Sexual abuse": "Starvation, Dehydration", 
    "Sickness and lack of access to medicines, Harsh weather/lack of adequate shelter, Dehydration": "Sickness", 
    "Starvation, Dehydration, Harsh conditions, Excessive Physical Abuse": "Starvation, Dehydration", 
    "Fall from steep slope": "Injury/Fall", "Starvation, Dehydration, Excessive Physical Abuse, Shot, Stabbed": "Violence/Murder", 
    "Pneumonia, Malnutrition": "Sickness", "Respiratory illness": "Sickness", "Sickness and lack of access to medicines, Harsh weather/lack of adequate shelter, Sexual abuse": "Sickness", 
    "Attacked by Apache helicopter": "Violence/Murder", "Heat stroke": "Sickness", "Fall into a canyon": "Injury/Fall", 
    "Crushed by stones (train)": "Train/Vehicle Acc.", "Starvation, Vehicle Accident": "Train/Vehicle Acc.", 
    "Digestive bleeding, Sickness and lack of access to medicines": "Sickness", "Starvation, Dehydration, Harsh weather/lack of adequate shelter, Sexual abuse": "Starvation, Dehydration"})

In [51]:
#mmp_update["Cause of Death"].value_counts()

In [49]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Sickness and lack of access to medicines, Starvation, Dehydration, Excessive Physical Abuse": "Sickness", 
    "Presumed asphyxiation": "Violence/Murder", "Harsh weather/lack of adequate shelter, Vehicle Accident": "Train/Vehicle Acc.", 
    "Pneumonia": "Sickness", "Sickness and lack of access to medicines, Sexual abuse": "Sickness", "Fuel burns": "Injury/Fall", 
    "Starvation, Suffocation, Excessive Physical Abuse": "Starvation, Dehydration", "Sickness and lack of access to medicines, Excessive Physical Abuse": "Sickness", 
    "Dehydration, Exposure, Hyperthermia": "Hypothermia", "Unknown (violence)": "Violence/Murder", "Rape, Murdered": "Violence/Murder", 
    "Dehydration, Asphyxiation": "Starvation, Dehydration", "Suffocation, Vehicle Accident": "Train/Vehicle Acc.", 
    "Presumed hypothermia": "Hypothermia", "Sickness and lack of access to medicines, Harsh weather/lack of adequate shelter, Excessive Physical Abuse, Sexual abuse": "Sickness", 
    "Drowning, Suffocation": "Drowning", "Vehicle Accident, Shot, Stabbed": "Train/Vehicle Acc.", "Fall onto train tracks": "Injury/Fall", 
    "Dehydration, Suffocation, Vehicle Accident": "Starvation, Dehydration", "Envenomation": "Animal Attack", 
    "Harsh weather/lack of adequate shelter, Suffocation, Excessive Physical Abuse, Sexual abuse": "Harsh Weather/Shelter", 
    "Organ failure": "Sickness", "Hyperthermia, Dehydration": "Hypothermia", "Hanging": "Suicide", "Hit by car, Murdered": "Violence/Murder"})

In [56]:
#mmp_update["Cause of Death"].value_counts()

In [54]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Murdered, Violence": "Violence/Murder", "Respiratory illness, Sickness and lack of access to medicines": "Sickness", 
    "Dehydration, Shot or stabbed": "Starvation, Dehydration", "Starvation, Shot or stabbed": "Starvation, Dehydration", 
    "Dehydration, Vehicle Accident": "Train/Vehicle Acc.", "Excessive Physical Abuse, Shot or stabbed": "Violence/Murder", 
    "Fall from border fence": "Injury/Fall", "Starvation, Harsh weather/lack of adequate shelter": "Starvation, Dehydration", 
    "Sickness and lack of access to medicines, Starvation, Sexual abuse": "Sickness", "Asphyxiation, Drowning": "Drowning", "Harsh conditions, Suffocation, Vehicle Accident": "Train/Vehicle Acc.", 
    "Excessive Physical Abuse, Shot, Stabbed": "Violence/Murder", "Starvation, Dehydration, Harsh weather/lack of adequate shelter, Excessive Physical Abuse, Sexual abuse": "Violence/Murder", 
    "Drowning, Hypothermia, Malnutrition": "Drowning", "Exposure, Hypothermia": "Hypothermia", "Sickness and lack of access to medicines, Dehydration, Malnutrition": "Sickness", 
    "Vehicle Accident, Harsh weather/lack of adequate shelter": "Train/Vehicle Acc.", "Sickness and lack of access to medicines, Dehydration, Harsh weather/lack of adequate shelter, Suffocation, Sexual abuse": "Sickness", 
    "Train Accident, Unknown (found dead next to train tracks)": "Train/Vehicle Acc.", "Excessive Physical Abuse, Sexual abuse, Shot or stabbed": "Violence/Murder", "Dehydration, Vehicle Accident, Excessive Physical Abuse": "Train/Vehicle Acc."})

In [59]:
#mmp_update["Cause of Death"].value_counts()

In [57]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Killed by landmine blast": "Landmine Blast", "Starvation, Suffocation": "Starvation, Dehydration", 
    "Exposure, Harsh conditions": "Harsh Weather/Shelter", "Sickness and lack of access to medicines, Suffocation, Excessive Physical Abuse": "Sickness", 
    "Crushed, Vehicle Accident": "Train/Vehicle Acc.", "Shot, Stabbed, Excessive Physical Abuse": "Violence/Murder", "Fall from boat": "Injury/Fall", "Sexual abuse": "Violence/Murder", 
    "Exposure, Malnutrition": "Starvation, Dehydration", "Killed by hippoptamus": "Animal Attack", "Crushed, Asphyxiation": "Injury/Fall", "Struck by lightning bolt": "Harsh Weather/Shelter", 
    "Harsh weather/lack of adequate shelter, Dehydration": "Harsh Weather/Shelter"})

In [62]:
#mmp_update["Cause of Death"].value_counts()

In [60]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Crushed": "Injury/Fall", "Starvation, Harsh weather/lack of adequate shelter, Suffocation": "Starvation, Dehydration", 
    "Sickness and lack of access to medicines, Dehydration, Excessive Physical Abuse": "Sickness", 
    "Sickness and lack of access to medicines, Starvation, Harsh weather/lack of adequate shelter, Suffocation": "Sickness", 
    "Sickness and lack of access to medicines, Suffocation": "Sickness", "Starvation, Suffocation, Shot, Stabbed, Excessive Physical Abuse": "Starvation, Dehydration", 
    "Pulmonary complications, Sickness and lack of access to medicines": "Sickness", 
    "Injuries from boat motor, Vehicle Accident": "Train/Vehicle Acc.", "Fuel burns, Suffocation": "Injury/Fall"})

In [64]:
#mmp_update["Cause of Death"].value_counts()

In [63]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Starvation, Harsh weather/lack of adequate shelter, Suffocation, Excessive Physical Abuse, Sexual abuse": "Starvation, Dehydration",
    "Starvation, Suffocation, Harsh weather/lack of adequate shelter, Excessive Physical Abuse, Sexual abuse": "Starvation, Dehydration", 
    "Sickness and lack of access to medicines, Starvation, Excessive Physical Abuse, Sexual abuse": "Sickness", 
    "Dehydration, Harsh weather/lack of adequate shelter, Suffocation, Excessive Physical Abuse": "Starvation, Dehydration", 
    "Hyperthermia, Exposure, Dehydration": "Hypothermia", "Sickness and lack of access to medicines, Vehicle Accident, Excessive Physical Abuse": "Sickness", 
    "Sickness and lack of access to medicines, Starvation, Harsh weather/lack of adequate shelter": "Sickness", 
    "Starvation, Dehydration, Harsh conditions, Suffocation": "Starvation, Dehydration", "Coronary artery atherosclerosis": "Cardiac Arrest"})

In [67]:
#mmp_update["Cause of Death"].value_counts()

In [66]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Starvation, Dehydration, Harsh weather/lack of adequate shelter, Vehicle Accident": "Starvation, Dehydration", 
    "Rockslide": "Injury/Fall", "Suffocation, Excessive Physical Abuse, Sexual abuse": "Violence/Murder", 
    "Starvation, Dehydration, Suffocation, Excessive Physical Abuse, Sexual abuse, Shot, Stabbed": "Starvation, Dehydration", "Cardiac arrest": "Cardiac Arrest", 
    "Starvation, Dehydration, Excessive Physical Abuse, Sexual abuse, Shot, Stabbed": "Starvation, Dehydration", 
    "Exhaustion": "Sickness", "Harsh weather/lack of adequate shelter, Suffocation, Excessive Physical Abuse": "Harsh Weather/Shelter"})

In [71]:
#mmp_update["Cause of Death"].value_counts()

In [70]:
mmp_update["Cause of Death"] = mmp_update["Cause of Death"].replace(
    {"Starvation, Harsh conditions": "Starvation, Dehydration", "Starvation, Harsh conditions, Excessive Physical Abuse": "Starvation, Dehydration", 
    "Sickness and lack of access to medicines, Dehydration, Shot or stabbed": "Sickness", 
    "Starvation, Dehydration, Harsh conditions": "Starvation, Dehydration", "Starvation, Suffocation, Shot, Stabbed": "Violence/Murder"})

In [72]:
mmp_update["Cause of Death"].value_counts()

Drowning                     1326
Cause Unknown                1078
Sickness                      943
Train/Vehicle Acc.            689
Violence/Murder               503
Starvation, Dehydration       414
Hypothermia                   205
Harsh Weather/Shelter          84
Injury/Fall                    29
Cardiac Arrest                 20
Electrocution                  16
Plane stowaway                  8
Landmine Blast                  7
Suicide                         4
Animal Attack                   4
Post-partum complications       2
Fire                            1
Name: Cause of Death, dtype: int64

In [76]:
mmp_update

Unnamed: 0,Web ID,Region of Incident,Reported Date,Reported Year,Reported Month,Number Dead,Total Dead and Missing,Number of Survivors,Number of Females,Number of Males,Number of Children,Cause of Death,Location Description,Information Source,Migration Route,UNSD Geographical Grouping,Source Quality,Latitude,Longitude
0,51591,Mediterranean,28-Mar-19,2019,Mar,,2,36.0,,2.0,,Drowning,"Off the coast of Chios, Greece",Hellenic Coast Guard via IOM Greece,Eastern Mediterranean,Uncategorized,5,38.362368696592,26.172509473654
1,51588,Mediterranean,26-Mar-19,2019,Mar,4.0,4,11.0,3.0,,1.0,Drowning,"Off the coast of Ayvacık district, Çanakkale p...",Turkish Coast Guard via IOM Turkey,Eastern Mediterranean,Uncategorized,5,39.441975591614,26.378816195919
2,51589,Mediterranean,26-Mar-19,2019,Mar,1.0,1,,,,,Drowning,"Body recovered on Playa del Tarajal, Ceuta, Sp...","Ceuta al día, El Pueblo de Ceuta",Western Mediterranean,Uncategorized,3,35.871901875921,-5.343037665842
3,51590,Mediterranean,26-Mar-19,2019,Mar,1.0,1,,,,,Drowning,"Body recovered on beach near Tetouan, Morocco ...",El Pueblo de Ceuta,Western Mediterranean,Uncategorized,1,35.635115912988,-5.275650103548
4,51587,Central America,25-Mar-19,2019,Mar,1.0,1,,,1.0,,Train/Vehicle Acc.,"Train tracks in Teacalco, Tlaxcala, Mexico","Megalópolis, Línea de contraste",,Central America,3,19.334475177429,-98.069823987538
5,51580,US-Mexico Border,23-Mar-19,2019,Mar,1.0,1,,,,,Cause Unknown,"Pima County jurisdiction, Arizona, USA",Pima County Office of the Medical Examiner,,Northern America,5,32.057499000000,-111.666072500000
6,51581,US-Mexico Border,23-Mar-19,2019,Mar,1.0,1,,,,,Cause Unknown,"Pima County jurisdiction, Arizona, USA",Pima County Office of the Medical Examiner,,Northern America,5,32.057499000000,-111.666072500000
7,51585,Southeast Asia,23-Mar-19,2019,Mar,8.0,10,,1.0,,,Train/Vehicle Acc.,"Sa Setthi intersection in Tambon Ban Mai, Tha ...","The Nation, Vietnam+",,South-eastern Asia,3,13.865704814542,99.587248723929
8,51586,Mediterranean,23-Mar-19,2019,Mar,4.0,8,,3.0,5.0,,Drowning,"Off the coast of Sfax, Tunisia","Shems FM, Tunisie Numérique, Kapitalis",Central Mediterranean,Uncategorized,3,35.317034468315,11.078928258810
9,51578,US-Mexico Border,21-Mar-19,2019,Mar,1.0,1,,,,,Cause Unknown,"Pima County jurisdiction, Arizona, USA",Pima County Office of the Medical Examiner,,Northern America,5,32.057499000000,-111.666072500000


In [75]:
mmp_update.count()

Web ID                        5333
Region of Incident            5333
Reported Date                 5333
Reported Year                 5333
Reported Month                5333
Number Dead                   5142
Total Dead and Missing        5333
Number of Survivors            749
Number of Females              897
Number of Males               2813
Number of Children             644
Cause of Death                5333
Location Description          5323
Information Source            5331
Migration Route               2660
UNSD Geographical Grouping    5322
Source Quality                5333
Latitude                      5332
Longitude                     5332
dtype: int64

In [77]:
# Export CSV

mmp_update.to_csv('../resources_data/cleaned_mmp2.csv')