In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

In [3]:
# Stacy's code starts here

In [4]:
# Load naturalization table
nat_file = 'fy2018_naturalization.xlsx'
filepath = os.path.join('.', 'Resources', nat_file)

naturalization = pd.read_excel(filepath, header=5)

In [5]:
# Drop notes data at end of document
naturalization.drop(labels=range(112,118), inplace=True)
naturalization.tail()

Unnamed: 0,Year,filed,Total,Civilian,Military 2,Not reported,denied
107,2014,773824.0,653416.0,642431.0,7468.0,3517,66767.0
108,2015,783062.0,730259.0,720645.0,7234.0,2380,75810.0
109,2016,972151.0,753060.0,742090.0,8885.0,2085,86033.0
110,2017,986851.0,707265.0,695718.0,6883.0,4664,83176.0
111,2018,810548.0,761901.0,750771.0,4495.0,6635,92586.0


In [6]:
# Rename columns
naturalization.rename(columns={
    'filed': 'Petitions filed',
    'Total': 'Naturalized, total',
    'denied': 'Petitions denied'
}, inplace=True)

# Drop unneeded columns
naturalization = naturalization.drop(labels=['Civilian', 'Military 2', 'Not reported'], axis='columns')

In [7]:
# Find funny/footnoted years and fix them
for index, row in naturalization.iterrows():
    
    year = naturalization.loc[index, 'Year']
    
    if len(str(year)) > 4:
        year = int(year[0:5])
        naturalization.loc[index, 'Year'] = year
    else:
        pass

# Set year as index
naturalization.set_index(keys=['Year'], inplace=True)

In [8]:
# Change datatype to int for all columns
for c in naturalization.columns:
    naturalization[c] = naturalization[c].astype('int')

In [9]:
naturalization.head()

Unnamed: 0_level_0,Petitions filed,"Naturalized, total",Petitions denied
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1907,21113,7941,250
1908,44032,25975,3330
1909,43141,38374,6341
1910,55750,39448,7781
1911,74740,56683,9017


In [86]:
# Load asylum seeker age/gender/etc table (2018)
asy_2018_file = 'fy2018_table18d_asylum_age_etc.xlsx'
filepath = os.path.join('.', 'Resources', asy_2018_file)

asylum_2018 = pd.read_excel(filepath, header=4)

In [87]:
asylum_2018.tail(10)

Unnamed: 0,Characteristic,Total,Unnamed: 2,Spouses,Children
28,Total,25439.0,16927.0,3636,4876
29,Married,10142.0,6506.0,3636,-
30,Single,14154.0,9278.0,-,4876
31,Widowed,232.0,232.0,-,-
32,Divorced/separated,896.0,896.0,-,-
33,Unknown,15.0,15.0,-,-
34,D Data withheld to limit disclosure.,,,,
35,- Represents zero.,,,,
36,Note: Data not available for individuals grant...,,,,
37,Source: U.S. Department of Homeland Security.,,,,


In [88]:
# Drop notes data at end of document
asylum_2018.drop(labels=range(34,38), inplace=True)
asylum_2018.tail()

Unnamed: 0,Characteristic,Total,Unnamed: 2,Spouses,Children
29,Married,10142.0,6506,3636,-
30,Single,14154.0,9278,-,4876
31,Widowed,232.0,232,-,-
32,Divorced/separated,896.0,896,-,-
33,Unknown,15.0,15,-,-


In [89]:
# Split into sex [5:8], rename column
asylum_2018_sex = asylum_2018.iloc[1:4,0:2]
asylum_2018_sex.rename(columns={
    "Characteristic": "Sex"
}, inplace=True)

In [90]:
# Drop unneeded rows and reset index
asylum_2018_sex.set_index(keys='Sex', inplace=True)
asylum_2018_sex

Unnamed: 0_level_0,Total
Sex,Unnamed: 1_level_1
Total,25439.0
Female,12483.0
Male,12956.0


In [95]:
# Rename "total" to reflect dataset scope
asylum_2018_sex.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_sex

Unnamed: 0_level_0,Asylum 2018
Sex,Unnamed: 1_level_1
Total,25439.0
Female,12483.0
Male,12956.0


In [91]:
# Split out broad age group [27:31]
asylum_2018_broad_age = asylum_2018.iloc[22:27,0:2]
asylum_2018_broad_age_total = asylum_2018_broad_age.drop([22])
asylum_2018_broad_age_total.rename(columns={
    "Characteristic": "Age"
}, inplace=True)
asylum_2018_broad_age_total.set_index(keys='Age', inplace=True)
asylum_2018_broad_age_total

Unnamed: 0_level_0,Total
Age,Unnamed: 1_level_1
Total,25439.0
Under 16 years,4837.0
16 to 20 years,3278.0
21 years and over,17324.0


In [100]:
# Rename "total" to reflect dataset scope
asylum_2018_broad_age_total.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_broad_age_total

Unnamed: 0_level_0,Asylum 2018
Age,Unnamed: 1_level_1
Total,25439.0
Under 16 years,4837.0
16 to 20 years,3278.0
21 years and over,17324.0


In [92]:
# Split out marital status [32:38]

In [93]:
asylum_2018_marital = asylum_2018.iloc[28:34,0:2]
asylum_2018_marital.rename(columns={
    "Characteristic": "Marital Status"
}, inplace=True)
asylum_2018_marital.set_index(keys='Marital Status', inplace=True)
asylum_2018_marital

Unnamed: 0_level_0,Total
Marital Status,Unnamed: 1_level_1
Total,25439.0
Married,10142.0
Single,14154.0
Widowed,232.0
Divorced/separated,896.0
Unknown,15.0


In [101]:
# Rename "total" to reflect dataset scope
asylum_2018_marital.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_marital

Unnamed: 0_level_0,Asylum 2018
Marital Status,Unnamed: 1_level_1
Total,25439.0
Married,10142.0
Single,14154.0
Widowed,232.0
Divorced/separated,896.0
Unknown,15.0


In [106]:
# Cleaning asylum demographic data from 2009
asy_2009_file = 'fy_2009_table15d_asylum_age_etc.xls'
filepath = os.path.join('.', 'Resources', asy_2009_file)

asylum_2009 = pd.read_excel(filepath, header=5)

In [108]:
asylum_2009.head(10)

Unnamed: 0,Characteristic,Total,Unnamed: 2,Spouses,Children
0,GENDER,,,,
1,Total,74602.0,32511.0,13440,28651.0
2,Male,38491.0,21839.0,1844,14808.0
3,Female,36111.0,10672.0,11596,13843.0
4,,,,,
5,AGE,,,,
6,Total,74602.0,32511.0,13440,28651.0
7,Under 1 year,361.0,4.0,-,357.0
8,1 to 4 years,6409.0,78.0,-,6331.0
9,5 to 9 years,7226.0,143.0,-,7083.0


In [107]:
# Drop notes data at end of document
asylum_2009.drop(labels=range(37,41), inplace=True)
asylum_2009.tail()

Unnamed: 0,Characteristic,Total,Unnamed: 2,Spouses,Children
32,Single,40798.0,12182,25,28591
33,Married,29770.0,16340,13404,26
34,Widowed,2452.0,D,-,D
35,Divorced/separated,1483.0,1464,5,
36,Unknown,99.0,D,6,D


In [112]:
# Split into sex [5:8], rename column
asylum_2009_sex = asylum_2009.iloc[1:4,0:2]
asylum_2009_sex.rename(columns={
    "Characteristic": "Sex"
}, inplace=True)
asylum_2009_sex

# Rename "total" to reflect dataset scope
asylum_2009_sex.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

# reset index
asylum_2009_sex.set_index(keys='Sex', inplace=True)
asylum_2009_sex

Unnamed: 0_level_0,Asylum 2009
Sex,Unnamed: 1_level_1
Total,74602.0
Male,38491.0
Female,36111.0


In [121]:
# Split out broad age group [32:35]
asylum_2009_broad_age = asylum_2009.iloc[25:29,0:2]
asylum_2009_broad_age.rename(columns={
    "Characteristic": "Age"
}, inplace=True)
asylum_2009_broad_age.set_index(keys='Age', inplace=True)
asylum_2009_broad_age

# Rename "total" to reflect dataset scope
asylum_2009_broad_age.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

asylum_2009_broad_age

Unnamed: 0_level_0,Asylum 2009
Age,Unnamed: 1_level_1
Total,74602.0
Under 16,22288.0
Age 16 to 20,7889.0
Age 21 and over,44425.0


In [127]:
# Split out marital status
asylum_2009_marital = asylum_2009.iloc[31:37,0:2]
asylum_2009_marital.rename(columns={
    "Characteristic": "Marital Status"
}, inplace=True)
asylum_2009_marital.set_index(keys='Marital Status', inplace=True)

# Rename "total" to reflect dataset scope
asylum_2009_marital.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

asylum_2009_marital

Unnamed: 0_level_0,Asylum 2009
Marital Status,Unnamed: 1_level_1
Total,74602.0
Single,40798.0
Married,29770.0
Widowed,2452.0
Divorced/separated,1483.0
Unknown,99.0


In [None]:
# Stacy's code ends here

In [None]:
# Kana's code starts here

In [None]:
# Kana's code ends here

In [None]:
# Satish's code starts here

In [None]:
# Satish's code ends here

In [3]:
# Umar's code starts here
#Read in the Excel file and view the headers
Lawful_df = pd.read_excel("./Resources/fy2018_Lawful.xlsx", header=4)
Lawful_df.head()

Unnamed: 0,Characteristic,Total,Female,Male,Unknown
0,AGE,,,,
1,Total,1096611.0,584426.0,512176.0,9
2,Under 1 year,2959.0,1472.0,1487.0,-
3,1 to 4 years,34215.0,16976.0,17239.0,-
4,5 to 9 years,61533.0,30098.0,31433.0,2


In [4]:
#Use the iloc function to locate the point of interest in a data set
Broad_age = Lawful_df.iloc[19:23,:]
Broad_age

Unnamed: 0,Characteristic,Total,Female,Male,Unknown
19,Total,1096611.0,584426.0,512176.0,9
20,Under 16 years,177300.0,86839.0,90458.0,3
21,16 to 20 years,85830.0,42113.0,43715.0,2
22,21 years and over,833481.0,455474.0,378003.0,4


In [5]:
#View the characteristic and Total columns 
Broad_age_df = pd.DataFrame(Broad_age)
Sex = Broad_age_df.iloc[:, 0:2]
Sex

Unnamed: 0,Characteristic,Total
19,Total,1096611.0
20,Under 16 years,177300.0
21,16 to 20 years,85830.0
22,21 years and over,833481.0


In [6]:
#Rename your columns 
Cleaned = Sex.rename(columns={"Characteristic": "Age", "Total": "Lawful 2018"})
Index_age = Cleaned.set_index("Age")
Index_age

Unnamed: 0_level_0,Lawful 2018
Age,Unnamed: 1_level_1
Total,1096611.0
Under 16 years,177300.0
16 to 20 years,85830.0
21 years and over,833481.0


In [7]:
#View a single row 
Sex = Lawful_df.iloc[19,1:]
Sex.to_frame(name="LPR 2018")

Unnamed: 0,LPR 2018
Total,1096610.0
Female,584426.0
Male,512176.0
Unknown,9.0


In [8]:
#Use the iloc function to locate the point of interest in a data set
Marital_status = Lawful_df.iloc[24:30,:]
Marital_status

Unnamed: 0,Characteristic,Total,Female,Male,Unknown
24,Total,1096611.0,584426.0,512176.0,9
25,Married,627443.0,342625.0,284816.0,2
26,Single,400196.0,191733.0,208457.0,6
27,Widowed,26579.0,23446.0,3133.0,-
28,Divorced/separated,32416.0,22045.0,10371.0,-
29,Unknown,9977.0,4577.0,5399.0,1


In [9]:
#View the characteristic and Total columns 
New_marital_df = pd.DataFrame(Marital_status)
Specific = New_marital_df.iloc[:, 0:2]
Specific

Unnamed: 0,Characteristic,Total
24,Total,1096611.0
25,Married,627443.0
26,Single,400196.0
27,Widowed,26579.0
28,Divorced/separated,32416.0
29,Unknown,9977.0


In [10]:
#Rename your columns 
Renamed = Specific.rename(columns={"Characteristic": "Marital Status", "Total": "Lawful Permanent Resident 2018"})
Renamed.head()

Unnamed: 0,Marital Status,Lawful Permanent Resident 2018
24,Total,1096611.0
25,Married,627443.0
26,Single,400196.0
27,Widowed,26579.0
28,Divorced/separated,32416.0


In [11]:
#View a single column 
Status = Lawful_df.iloc[24:30,0:2]
Name18 = Status.rename(columns={"Characteristic": "Marital Status", "Total": "Lawful 2018"})
#Set marital status as index for clarity
Name18.set_index("Marital Status", inplace=True)
Name18

Unnamed: 0_level_0,Lawful 2018
Marital Status,Unnamed: 1_level_1
Total,1096611.0
Married,627443.0
Single,400196.0
Widowed,26579.0
Divorced/separated,32416.0
Unknown,9977.0


In [12]:
#Read in the Excel file and 
Lawful09_df = pd.read_excel("./Resources/fy2009_Lawful.xls", header=4)
Lawful09_df.head()

Unnamed: 0,Characteristic,Total,Male,Female,Unknown
0,AGE,,,,
1,Total,1130818.0,513015.0,617799.0,4
2,Under 1 year,5782.0,2706.0,3076.0,-
3,1 to 4 years,32395.0,15975.0,16420.0,-
4,5 to 9 years,55373.0,28263.0,27110.0,-


In [13]:
#Use the iloc function to locate the point of interest in a data set
Broad09_age = Lawful09_df.iloc[20:24,:]
Broad09_age

Unnamed: 0,Characteristic,Total,Male,Female,Unknown
20,Total,1130818.0,513015.0,617799,4
21,Under 16 years,185960.0,94529.0,91431,-
22,16 to 20 years,101864.0,50720.0,51144,-
23,21 years and over,842992.0,367764.0,475224,4


In [14]:
#View the characteristic and Total columns 
Broad09_age_df = pd.DataFrame(Broad09_age)
Sex09 = Broad09_age_df.iloc[:, 0:2]
Sex09

Unnamed: 0,Characteristic,Total
20,Total,1130818.0
21,Under 16 years,185960.0
22,16 to 20 years,101864.0
23,21 years and over,842992.0


In [15]:
#Rename your columns 
Age09 = Sex09.rename(columns={"Characteristic": "Age", "Total": "Lawful Permanent Resident 2009"})
New_index09 = Age09.set_index("Age")
New_index09

Unnamed: 0_level_0,Lawful Permanent Resident 2009
Age,Unnamed: 1_level_1
Total,1130818.0
Under 16 years,185960.0
16 to 20 years,101864.0
21 years and over,842992.0


In [16]:
#View a single row 
Sex09 = Lawful09_df.iloc[20,1:]
Sex09.to_frame(name="LPR 2009")

Unnamed: 0,LPR 2009
Total,1130820.0
Male,513015.0
Female,617799.0
Unknown,4.0


In [17]:
#Use the iloc function to locate the point of interest in a data set
Marital09_status = Lawful09_df.iloc[26:32,:]
Marital09_status

Unnamed: 0,Characteristic,Total,Male,Female,Unknown
26,Total,1130818.0,513015.0,617799,4
27,Single,417232.0,219812.0,197419,1
28,Married,654674.0,279354.0,375320,-
29,Widowed,28439.0,3288.0,25151,-
30,Divorced/separated,26015.0,8261.0,17754,-
31,Unknown,4458.0,2300.0,2155,3


In [18]:
#View the characteristic and Total columns 
New09_marital_df = pd.DataFrame(Marital09_status)
Specific09 = New09_marital_df.iloc[:, 0:2]
Specific09

Unnamed: 0,Characteristic,Total
26,Total,1130818.0
27,Single,417232.0
28,Married,654674.0
29,Widowed,28439.0
30,Divorced/separated,26015.0
31,Unknown,4458.0


In [19]:
#Rename your columns 
Renamed09 = Specific09.rename(columns={"Characteristic": "Marital Status", "Total": "Lawful Permanent Resident 2009"})
Renamed09.head()

Unnamed: 0,Marital Status,Lawful Permanent Resident 2009
26,Total,1130818.0
27,Single,417232.0
28,Married,654674.0
29,Widowed,28439.0
30,Divorced/separated,26015.0


In [20]:
#View a single column
Status09 = Lawful09_df.iloc[26:32,0:2]
Name09 = Status09.rename(columns={"Characteristic": "Marital Status", "Total": "Lawful 2009"})
#Set marital status as index for clarity
Name09.set_index("Marital Status", inplace=True)
Name09

#Umar Code ends here

Unnamed: 0_level_0,Lawful 2009
Marital Status,Unnamed: 1_level_1
Total,1130818.0
Single,417232.0
Married,654674.0
Widowed,28439.0
Divorced/separated,26015.0
Unknown,4458.0
