In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

In [2]:
# Read file with Male Migration numbers with origin and destination
male_data_csv = "Resources/UN_MigrantStockMale_2017.csv"
# read a catalog of countries with their respective destinations
country_cat_csv = "Resources/Region_country_catalog.csv"

# Read data into Pandas data frames
male_data_df = pd.read_csv(male_data_csv)
country_cat_df = pd.read_csv(country_cat_csv)

In [3]:
# Columns with NO data come with "..", need to replace those with 0 
male_data_df = male_data_df.replace('..',0)

# join the numbers data to the catalog to get rid of all rows that DO NOT contain country information
male_total_df = pd.merge(country_cat_df,male_data_df, on="Country")

In [4]:
# Use melt function to convert countries' columns into rows keeping values for "Country","Region" and "Year"

# Store the column names we want to melt in a list to be used in the melt operation
lst_ctry = male_total_df.columns[3:]

# melt columns into rows
male_mig_df = pd.melt(male_total_df,id_vars=["Country","Region","Year"], value_vars=lst_ctry,\
                    var_name='Origin', value_name='Male Immigrants')
male_mig_df.replace({'Male Immigrants': ','}, {'Male Immigrants': ''}, regex=True, inplace=True)
male_mig_df[['Year','Male Immigrants']] = male_mig_df[['Year','Male Immigrants']].applymap(np.int64)
male_mig_df.groupby("Origin").agg({'Male Immigrants': 'sum'}).sort_values('Male Immigrants', ascending=False).head(20)
#male_mig_df.groupby("ORegion").agg({'Male Immigrants': 'sum'}).sort_values('Male Immigrants', ascending=False)

Unnamed: 0_level_0,Male Immigrants
Origin,Unnamed: 1_level_1
India,35499706
Mexico,26139271
Russian Federation,18042092
Bangladesh,17518777
China,16594852
Pakistan,13319295
Ukraine,10347410
Afghanistan,9947071
United Kingdom,9128150
Philippines,8886398


In [5]:
male_mig_df.groupby("Country").agg({'Male Immigrants': 'sum'}).sort_values('Male Immigrants', ascending=False).head(10)
#male_mig_df.groupby("DRegion").agg({'Male Immigrants': 'sum'}).sort_values('Male Immigrants', ascending=False)

Unnamed: 0_level_0,Male Immigrants
Country,Unnamed: 1_level_1
United States of America,83986305
Saudi Arabia,25323240
Russian Federation,22751868
Germany,20354048
United Arab Emirates,19532138
United Kingdom,14626495
France,14445171
Canada,13524190
Australia,12000138
Spain,11135540


In [8]:
male_mig_df.groupby(["Origin","Year"]).agg({'Male Immigrants': 'sum'}).sort_values('Male Immigrants', ascending=False).\
    unstack().reset_index().head()

Unnamed: 0_level_0,Origin,Male Immigrants,Male Immigrants,Male Immigrants,Male Immigrants
Year,Unnamed: 1_level_1,2005,2010,2015,2017
0,Afghanistan,2107894,2751790,2542717,2544670
1,Albania,516856,586807,575771,580344
2,Algeria,829505,901288,973054,976203
3,American Samoa,1387,1046,1078,1081
4,Andorra,2764,3425,3606,3621


In [26]:
# assign to a new DF
magu_male_df = male_mig_df.groupby(["Origin","Year"]).agg({'Male Immigrants': 'sum'}).\
    sort_values('Male Immigrants', ascending=False).unstack()
magu_male_df.head()
# Access to multilevel
#magu_df.loc[: , "Male Immigrants"].loc[: , 1990:2000]

Unnamed: 0_level_0,Male Immigrants,Male Immigrants,Male Immigrants,Male Immigrants
Year,2005,2010,2015,2017
Origin,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Afghanistan,2107894,2751790,2542717,2544670
Albania,516856,586807,575771,580344
Algeria,829505,901288,973054,976203
American Samoa,1387,1046,1078,1081
Andorra,2764,3425,3606,3621


In [19]:
# Now with female data
# Read file with Female Migration numbers with origin and destination
female_data_csv = "Resources/UN_MigrantStockFemale_2017.csv"

# Read data into Pandas data frames
female_data_df = pd.read_csv(female_data_csv)

# Columns with NO data come with "..", need to replace those with 0 
female_data_df = female_data_df.replace('..',0)

# join the numbers data to the catalog to get rid of all rows that DO NOT contain country information
female_total_df = pd.merge(country_cat_df,female_data_df, on="Country")

# Use melt function to convert countries' columns into rows keeping values for "Country","Region" and "Year"
# Store the column names we want to melt in a list to be used in the melt operation
lst_ctry = female_total_df.columns[3:]

# melt columns into rows
female_mig_df = pd.melt(female_total_df,id_vars=["Country","Region","Year"], value_vars=lst_ctry,\
                    var_name='Origin', value_name='Female Immigrants')
female_mig_df.replace({'Female Immigrants': ','}, {'Female Immigrants': ''}, regex=True, inplace=True)
female_mig_df[['Year','Female Immigrants']] = female_mig_df[['Year','Female Immigrants']].applymap(np.int64)

#female_mig_df.to_csv("Output/Clean_Female_Migrat.csv", index=False, header=True)

female_mig_df.groupby("Origin").agg({'Female Immigrants': 'sum'}).sort_values('Female Immigrants', ascending=False). \
     head(20)
#female_mig_df.groupby("ORegion").agg({'Female Immigrants': 'sum'}).sort_values('Female Immigrants', ascending=False)

Unnamed: 0_level_0,Female Immigrants
Origin,Unnamed: 1_level_1
Russian Federation,43509229
Mexico,32230020
India,29185487
China,26311369
Ukraine,21310863
Bangladesh,16533372
Afghanistan,15146674
Philippines,15031635
United Kingdom,14840887
Germany,13700214


In [24]:
magu_female_df = female_mig_df.groupby(["Origin","Year"]).agg({'Female Immigrants': 'sum'}).\
    sort_values('Female Immigrants', ascending=False).unstack()
magu_female_df.head()

Unnamed: 0_level_0,Female Immigrants,Female Immigrants,Female Immigrants,Female Immigrants,Female Immigrants,Female Immigrants,Female Immigrants
Year,1990,1995,2000,2005,2010,2015,2017
Origin,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Afghanistan,3063134,1584835,1948472,1718361,2237419,2312659,2281794
Albania,70434,218734,369292,448210,542264,562146,567800
Algeria,393787,423150,451883,759218,728893,812501,816509
American Samoa,992,1273,1537,1291,929,957,956
Andorra,2023,2224,2932,2998,3695,3983,4032


In [30]:
pd.concat([magu_male_df,magu_female_df],axis=1)


Unnamed: 0_level_0,Male Immigrants,Male Immigrants,Male Immigrants,Male Immigrants,Female Immigrants,Female Immigrants,Female Immigrants,Female Immigrants,Female Immigrants,Female Immigrants,Female Immigrants
Year,2005,2010,2015,2017,1990,1995,2000,2005,2010,2015,2017
Origin,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Afghanistan,2107894,2751790,2542717,2544670,3063134,1584835,1948472,1718361,2237419,2312659,2281794
Albania,516856,586807,575771,580344,70434,218734,369292,448210,542264,562146,567800
Algeria,829505,901288,973054,976203,393787,423150,451883,759218,728893,812501,816509
American Samoa,1387,1046,1078,1081,992,1273,1537,1291,929,957,956
Andorra,2764,3425,3606,3621,2023,2224,2932,2998,3695,3983,4032
Angola,335869,310137,302929,314410,412546,334521,435707,344536,314147,308391,318289
Anguilla,1555,1178,1175,1198,978,1238,1508,1503,1078,1073,1096
Antigua and Barbuda,17068,18572,22440,22970,11935,15173,18513,20438,22623,26536,27209
Argentina,401492,464852,463751,474789,226634,251410,286783,412118,478813,489193,502399
Armenia,487037,496416,508223,510514,462799,446248,407323,412783,425337,438023,440509


In [8]:
# Write the new dataframe to a new CSV file
#male_mig_df.to_csv("Output/Clean_Orig_Dest.csv", index=False, header=True)