In [None]:
# Import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os
from expected import expected

In [None]:
# Narrative/intro goes here

In [None]:
# Stacy start

In [None]:
# Import naturalization data
filename = 'naturalization_filed_denied.csv'
path = os.path.join('.', 'Output_files', filename)
nat_df = pd.read_csv(path)

nat_df.set_index(keys='Year', inplace=True)

In [None]:
# Plot the naturalization data - overall

plt.figure(figsize=(10,6))
plt.plot(nat_df.index, nat_df[['Petitions filed', 'Petitions denied', 'Naturalized, total']])
plt.legend(loc='best', labels=['Petitions filed', 'Petitions denied', 'Naturalized, total'])
plt.xlim(1907, 2020)
plt.ylim(-1,1450000)
plt.xticks(np.arange(1910, 2021, step=10))
plt.grid(b=True, axis='y', color='#cccccc')

# Add points of interest to grid
plt.annotate(xy=(1915,300000), s='World War I begins')
plt.vlines(x=1914.8, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.annotate(xy=(1940,460000), s='World War II begins')
plt.vlines(x=1939.75, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.annotate(xy=(1997,250000), s='IIRIRA')
plt.vlines(x=1996.75, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.annotate(xy=(2002,400000), s='9/11')
plt.vlines(x=2001.75, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.annotate(xy=(2003,325000), s='Border wall EO')
plt.vlines(x=2017, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.xlabel("Year")
plt.ylabel("Naturalizations")
plt.title("Trends in US naturalizations, 1907-2018")
plt.tight_layout()

# Spike in 1996 - and general increase - can be explained by this bill: https://www.vox.com/2016/4/28/11515132/iirira-clinton-immigration

In [None]:
# Pre and post 9/11
# create df
nat_911 = nat_df.loc[1999:2004,:]

# plot data
plt.figure(figsize=(4,4))
plt.plot(nat_911.index, nat_911[['Petitions filed', 'Petitions denied', 'Naturalized, total']])
plt.legend(loc='best', labels=['Filed', 'Denied', 'Naturalized'])
plt.xlim(1999,2003)
plt.ylim(0,950000)
plt.xticks(np.arange(1999,2005))
plt.yticks(np.arange(0,1000000, step=100000))
plt.annotate(xy=(2001.9,400000), s='9/11')
plt.vlines(x=2001.75, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.xlabel("Year")
plt.ylabel("Naturalizations")
plt.title("Trends in US naturalizations, before and after 9/11")

In [None]:
# Pre and post Trump
# create df
nat_trump = nat_df.loc[2015:2018,:]

# plot data
plt.figure(figsize=(5.75,4))
plt.plot(nat_trump.index, nat_trump[['Petitions filed', 'Petitions denied', 'Naturalized, total']])
plt.legend(loc='lower left', labels=['Filed', 'Denied', 'Naturalized'], bbox_to_anchor=(1, 0.7))
plt.annotate(xy=(2016,400000), s='Border wall EO')
plt.vlines(x=2017.1, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.xlabel("Year")
plt.ylabel("Naturalizations")
plt.title("Trends in US naturalizations, before and after border wall EO")
plt.xlim(2015,2018)
plt.ylim(0,1100000)
plt.xticks(np.arange(2015,2019))
plt.yticks(np.arange(0,1100000, step=200000))
plt.tight_layout()

In [None]:
# Pre and post IIRIRA
# create df
nat_iiria = nat_df.loc[1995:1999,:]

# plot data
plt.figure(figsize=(5.75,4))
plt.plot(nat_iiria.index, nat_iiria[['Petitions filed', 'Petitions denied', 'Naturalized, total']])
plt.legend(loc='lower left', labels=['Filed', 'Denied', 'Naturalized'], bbox_to_anchor=(1, 0.7))
# plt.annotate(xy=(2016,400000), s='Border wall EO')
# plt.vlines(x=2017.1, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.xlabel("Year")
plt.ylabel("Naturalizations")
plt.title("Trends in US naturalizations, before and after IIRIRA")
plt.annotate(xy=(1996.9,250000), s='IIRIRA')
plt.vlines(x=1996.75, ymin = 0, ymax = 1500000, linestyle='dashed', alpha=0.2)
plt.xlim(1995,1999)
plt.ylim(0,1500000)
plt.xticks(np.arange(1995,2000))
plt.yticks(np.arange(0,1500000, step=200000))
plt.tight_layout()

In [None]:
# Are these trends statistically significant? Apply chi-squared test.
'''
STEPS
import by-country naturalization data
create pre 9/11 sample for muslim countries 99-00
create post 9/11 sample for muslim countries 04-05
create pre 9/11 sample for random non-muslim countries (maybe overall immigration that year?)
create post 9/11 sample for random non-muslim countries (maybe overall immigration that year?)
find expected values for pre 9/11 and post 9/11 muslim countries
find degrees of freedom
perform chi-square test
'''

# Get the sample size for petitions filed, denied
# Drop naturalization data (due to lag in how quickly policy changes might affect nat numbers)
nat_iiria_df = nat_iiria.drop(columns=['Naturalized, total'])
# # Create new column to show petitions in pipeline (return to this if needed)
# nat_iiria_df['Petitions in pipeline'] = nat_iiria_df['Petitions filed'] - nat_iiria_df['Petitions denied']
nat_total_pet_filed = nat_iiria_df.loc[:,'Petitions filed'].sum()
nat_total_pet_denied = nat_iiria_df.loc[:,'Petitions denied'].sum()

# Get the sample size for the years
nat_95 = nat_iiria_df.loc[1995,:].sum()
nat_96 = nat_iiria_df.loc[1996,:].sum()
nat_97 = nat_iiria_df.loc[1997,:].sum()
nat_98 = nat_iiria_df.loc[1998,:].sum()
nat_99 = nat_iiria_df.loc[1999,:].sum()

In [None]:
# Get population size
nat_totals = nat_total_pet_filed + nat_total_pet_denied

In [None]:
# Get expected values using 'expected' function we imported

# Calculate expected value for petitions filed in 95-97 (pre-IIRIA)
expected_filed_95 = expected(nat_95, nat_total_pet_filed, nat_totals)
expected_filed_96 = expected(nat_96, nat_total_pet_filed, nat_totals)
expected_filed_97 = expected(nat_97, nat_total_pet_filed, nat_totals)

# Calculate expected value for petitions filed in 98-99 (post-IIRIA)
expected_filed_98 = expected(nat_98, nat_total_pet_filed, nat_totals)
expected_filed_99 = expected(nat_99, nat_total_pet_filed, nat_totals)

# Calculate expected value for petitions denied 95-97
expected_denied_95 = expected(nat_95, nat_total_pet_denied, nat_totals)
expected_denied_96 = expected(nat_96, nat_total_pet_denied, nat_totals)
expected_denied_97 = expected(nat_97, nat_total_pet_denied, nat_totals)

# Calculate expected value for petitions denied 98-99
expected_denied_98 = expected(nat_98, nat_total_pet_denied, nat_totals)
expected_denied_99 = expected(nat_99, nat_total_pet_denied, nat_totals)

# Create list so we can loop through these in a moment
expected_filed = [expected_filed_95, expected_filed_96, expected_filed_97, expected_filed_98, expected_filed_99]
# denied = [expected_denied_95, expected_denied_96, expected_denied_97, expected_denied_98, expected_denied_99]

In [None]:
degrees_freedom = 4
crit_value = 9.49

In [None]:
expected_filed

In [None]:
# Apply chi-squared test for petitions filed and changes over time
from scipy.stats import chisquare

chisquare(nat_iiria_df['Petitions filed'].values, f_exp=expected_filed, ddof=degrees_freedom)

In [None]:
# bracketed values are same as nat_iiria_df['Petitions filed'].values
chisquare([959963, 1277403, 1412712,  932957,  765346], f_exp=expected_filed, ddof=degrees_freedom)

In [None]:
# Stacy end

In [None]:
# Kana start

In [None]:
# Kana end

In [None]:
# Satish start

In [None]:
# Satish end

In [None]:
# Umar start

In [None]:
# Umar End

In [None]:
# Narrative/summary goes here