In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

In [3]:
# Stacy's code starts here

In [4]:
# Load naturalization table
nat_file = 'fy2018_naturalization.xlsx'
filepath = os.path.join('.', 'Resources', nat_file)

naturalization = pd.read_excel(filepath, header=5)

In [5]:
# Drop notes data at end of document
naturalization.drop(labels=range(112,118), inplace=True)
naturalization.tail()

Unnamed: 0,Year,filed,Total,Civilian,Military 2,Not reported,denied
107,2014,773824.0,653416.0,642431.0,7468.0,3517,66767.0
108,2015,783062.0,730259.0,720645.0,7234.0,2380,75810.0
109,2016,972151.0,753060.0,742090.0,8885.0,2085,86033.0
110,2017,986851.0,707265.0,695718.0,6883.0,4664,83176.0
111,2018,810548.0,761901.0,750771.0,4495.0,6635,92586.0


In [6]:
# Rename columns
naturalization.rename(columns={
    'filed': 'Petitions filed',
    'Total': 'Naturalized, total',
    'denied': 'Petitions denied'
}, inplace=True)

# Drop unneeded columns
naturalization = naturalization.drop(labels=['Civilian', 'Military 2', 'Not reported'], axis='columns')

In [7]:
# Find funny/footnoted years and fix them
for index, row in naturalization.iterrows():
    
    year = naturalization.loc[index, 'Year']
    
    if len(str(year)) > 4:
        year = int(year[0:5])
        naturalization.loc[index, 'Year'] = year
    else:
        pass

# Set year as index
naturalization.set_index(keys=['Year'], inplace=True)

In [8]:
# Change datatype to int for all columns
for c in naturalization.columns:
    naturalization[c] = naturalization[c].astype('int')

In [9]:
naturalization.head()

Unnamed: 0_level_0,Petitions filed,"Naturalized, total",Petitions denied
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1907,21113,7941,250
1908,44032,25975,3330
1909,43141,38374,6341
1910,55750,39448,7781
1911,74740,56683,9017


In [86]:
# Load asylum seeker age/gender/etc table (2018)
asy_2018_file = 'fy2018_table18d_asylum_age_etc.xlsx'
filepath = os.path.join('.', 'Resources', asy_2018_file)

asylum_2018 = pd.read_excel(filepath, header=4)

In [87]:
asylum_2018.tail(10)

Unnamed: 0,Characteristic,Total,Unnamed: 2,Spouses,Children
28,Total,25439.0,16927.0,3636,4876
29,Married,10142.0,6506.0,3636,-
30,Single,14154.0,9278.0,-,4876
31,Widowed,232.0,232.0,-,-
32,Divorced/separated,896.0,896.0,-,-
33,Unknown,15.0,15.0,-,-
34,D Data withheld to limit disclosure.,,,,
35,- Represents zero.,,,,
36,Note: Data not available for individuals grant...,,,,
37,Source: U.S. Department of Homeland Security.,,,,


In [88]:
# Drop notes data at end of document
asylum_2018.drop(labels=range(34,38), inplace=True)
asylum_2018.tail()

Unnamed: 0,Characteristic,Total,Unnamed: 2,Spouses,Children
29,Married,10142.0,6506,3636,-
30,Single,14154.0,9278,-,4876
31,Widowed,232.0,232,-,-
32,Divorced/separated,896.0,896,-,-
33,Unknown,15.0,15,-,-


In [89]:
# Split into sex [5:8], rename column
asylum_2018_sex = asylum_2018.iloc[1:4,0:2]
asylum_2018_sex.rename(columns={
    "Characteristic": "Sex"
}, inplace=True)

In [90]:
# Drop unneeded rows and reset index
asylum_2018_sex.set_index(keys='Sex', inplace=True)
asylum_2018_sex

Unnamed: 0_level_0,Total
Sex,Unnamed: 1_level_1
Total,25439.0
Female,12483.0
Male,12956.0


In [95]:
# Rename "total" to reflect dataset scope
asylum_2018_sex.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_sex

Unnamed: 0_level_0,Asylum 2018
Sex,Unnamed: 1_level_1
Total,25439.0
Female,12483.0
Male,12956.0


In [91]:
# Split out broad age group [27:31]
asylum_2018_broad_age = asylum_2018.iloc[22:27,0:2]
asylum_2018_broad_age_total = asylum_2018_broad_age.drop([22])
asylum_2018_broad_age_total.rename(columns={
    "Characteristic": "Age"
}, inplace=True)
asylum_2018_broad_age_total.set_index(keys='Age', inplace=True)
asylum_2018_broad_age_total

Unnamed: 0_level_0,Total
Age,Unnamed: 1_level_1
Total,25439.0
Under 16 years,4837.0
16 to 20 years,3278.0
21 years and over,17324.0


In [100]:
# Rename "total" to reflect dataset scope
asylum_2018_broad_age_total.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_broad_age_total

Unnamed: 0_level_0,Asylum 2018
Age,Unnamed: 1_level_1
Total,25439.0
Under 16 years,4837.0
16 to 20 years,3278.0
21 years and over,17324.0


In [92]:
# Split out marital status [32:38]

In [93]:
asylum_2018_marital = asylum_2018.iloc[28:34,0:2]
asylum_2018_marital.rename(columns={
    "Characteristic": "Marital Status"
}, inplace=True)
asylum_2018_marital.set_index(keys='Marital Status', inplace=True)
asylum_2018_marital

Unnamed: 0_level_0,Total
Marital Status,Unnamed: 1_level_1
Total,25439.0
Married,10142.0
Single,14154.0
Widowed,232.0
Divorced/separated,896.0
Unknown,15.0


In [101]:
# Rename "total" to reflect dataset scope
asylum_2018_marital.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_marital

Unnamed: 0_level_0,Asylum 2018
Marital Status,Unnamed: 1_level_1
Total,25439.0
Married,10142.0
Single,14154.0
Widowed,232.0
Divorced/separated,896.0
Unknown,15.0


In [106]:
# Cleaning asylum demographic data from 2009
asy_2009_file = 'fy_2009_table15d_asylum_age_etc.xls'
filepath = os.path.join('.', 'Resources', asy_2009_file)

asylum_2009 = pd.read_excel(filepath, header=5)

In [108]:
asylum_2009.head(10)

Unnamed: 0,Characteristic,Total,Unnamed: 2,Spouses,Children
0,GENDER,,,,
1,Total,74602.0,32511.0,13440,28651.0
2,Male,38491.0,21839.0,1844,14808.0
3,Female,36111.0,10672.0,11596,13843.0
4,,,,,
5,AGE,,,,
6,Total,74602.0,32511.0,13440,28651.0
7,Under 1 year,361.0,4.0,-,357.0
8,1 to 4 years,6409.0,78.0,-,6331.0
9,5 to 9 years,7226.0,143.0,-,7083.0


In [107]:
# Drop notes data at end of document
asylum_2009.drop(labels=range(37,41), inplace=True)
asylum_2009.tail()

Unnamed: 0,Characteristic,Total,Unnamed: 2,Spouses,Children
32,Single,40798.0,12182,25,28591
33,Married,29770.0,16340,13404,26
34,Widowed,2452.0,D,-,D
35,Divorced/separated,1483.0,1464,5,
36,Unknown,99.0,D,6,D


In [112]:
# Split into sex [5:8], rename column
asylum_2009_sex = asylum_2009.iloc[1:4,0:2]
asylum_2009_sex.rename(columns={
    "Characteristic": "Sex"
}, inplace=True)
asylum_2009_sex

# Rename "total" to reflect dataset scope
asylum_2009_sex.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

# reset index
asylum_2009_sex.set_index(keys='Sex', inplace=True)
asylum_2009_sex

Unnamed: 0_level_0,Asylum 2009
Sex,Unnamed: 1_level_1
Total,74602.0
Male,38491.0
Female,36111.0


In [121]:
# Split out broad age group [32:35]
asylum_2009_broad_age = asylum_2009.iloc[25:29,0:2]
asylum_2009_broad_age.rename(columns={
    "Characteristic": "Age"
}, inplace=True)
asylum_2009_broad_age.set_index(keys='Age', inplace=True)
asylum_2009_broad_age

# Rename "total" to reflect dataset scope
asylum_2009_broad_age.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

asylum_2009_broad_age

Unnamed: 0_level_0,Asylum 2009
Age,Unnamed: 1_level_1
Total,74602.0
Under 16,22288.0
Age 16 to 20,7889.0
Age 21 and over,44425.0


In [127]:
# Split out marital status
asylum_2009_marital = asylum_2009.iloc[31:37,0:2]
asylum_2009_marital.rename(columns={
    "Characteristic": "Marital Status"
}, inplace=True)
asylum_2009_marital.set_index(keys='Marital Status', inplace=True)

# Rename "total" to reflect dataset scope
asylum_2009_marital.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

asylum_2009_marital

Unnamed: 0_level_0,Asylum 2009
Marital Status,Unnamed: 1_level_1
Total,74602.0
Single,40798.0
Married,29770.0
Widowed,2452.0
Divorced/separated,1483.0
Unknown,99.0


In [None]:
# Stacy's code ends here

In [None]:
# Kana's code starts here

In [None]:
# Kana's code ends here

In [None]:
# Satish's code starts here

In [None]:
# Satish's code ends here

In [None]:
# Umar's code starts here

In [None]:
# Umar's code ends here