In [None]:
# Import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import folium
import gmaps
import os
from expected import expected
from countryinfo import CountryInfo
from config import gkey

In [None]:
# Narrative/intro goes here

In [None]:
# Stacy start

In [None]:
# Import naturalization data
filename = 'naturalization_filed_denied.csv'
path = os.path.join('.', 'Output_files', filename)
nat_df = pd.read_csv(path)

nat_df.set_index(keys='Year', inplace=True)

In [None]:
# Plot the naturalization data - overall

plt.figure(figsize=(10,6))
plt.plot(nat_df.index, nat_df[['Petitions filed', 'Petitions denied', 'Naturalized, total']])
plt.legend(loc='best', labels=['Petitions filed', 'Petitions denied', 'Naturalized, total'])
plt.xlim(1907, 2020)
plt.ylim(-1,1450000)
plt.xticks(np.arange(1910, 2021, step=10))
plt.grid(b=True, axis='y', color='#cccccc')

# Add points of interest to grid
plt.annotate(xy=(1915,300000), s='World War I begins')
plt.vlines(x=1914.8, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.annotate(xy=(1940,460000), s='World War II begins')
plt.vlines(x=1939.75, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.annotate(xy=(1997,250000), s='IIRIRA')
plt.vlines(x=1996.75, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.annotate(xy=(2002,400000), s='9/11')
plt.vlines(x=2001.75, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.annotate(xy=(2003,325000), s='Border wall EO')
plt.vlines(x=2017, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.xlabel("Year")
plt.ylabel("Naturalizations")
plt.title("Trends in US naturalizations, 1907-2018")
plt.tight_layout()

# Spike in 1996 - and general increase - can be explained by this bill: https://www.vox.com/2016/4/28/11515132/iirira-clinton-immigration

In [None]:
# Pre and post 9/11
# create df
nat_911 = nat_df.loc[1999:2004,:]

# plot data
plt.figure(figsize=(4,4))
plt.plot(nat_911.index, nat_911[['Petitions filed', 'Petitions denied', 'Naturalized, total']])
plt.legend(loc='best', labels=['Filed', 'Denied', 'Naturalized'])
plt.xlim(1999,2003)
plt.ylim(0,950000)
plt.xticks(np.arange(1999,2005))
plt.yticks(np.arange(0,1000000, step=100000))
plt.annotate(xy=(2001.9,400000), s='9/11')
plt.vlines(x=2001.75, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.xlabel("Year")
plt.ylabel("Naturalizations")
plt.title("Trends in US naturalizations, before and after 9/11")

In [None]:
# Pre and post Trump
# create df
nat_trump = nat_df.loc[2015:2018,:]

# plot data
plt.figure(figsize=(5.75,4))
plt.plot(nat_trump.index, nat_trump[['Petitions filed', 'Petitions denied', 'Naturalized, total']])
plt.legend(loc='lower left', labels=['Filed', 'Denied', 'Naturalized'], bbox_to_anchor=(1, 0.7))
plt.annotate(xy=(2016,400000), s='Border wall EO')
plt.vlines(x=2017.1, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.xlabel("Year")
plt.ylabel("Naturalizations")
plt.title("Trends in US naturalizations, before and after border wall EO")
plt.xlim(2015,2018)
plt.ylim(0,1100000)
plt.xticks(np.arange(2015,2019))
plt.yticks(np.arange(0,1100000, step=200000))
plt.tight_layout()

In [None]:
# Pre and post IIRIRA
# create df
nat_iiria = nat_df.loc[1995:1999,:]

# plot data
plt.figure(figsize=(5.75,4))
plt.plot(nat_iiria.index, nat_iiria[['Petitions filed', 'Petitions denied', 'Naturalized, total']])
plt.legend(loc='lower left', labels=['Filed', 'Denied', 'Naturalized'], bbox_to_anchor=(1, 0.7))
# plt.annotate(xy=(2016,400000), s='Border wall EO')
# plt.vlines(x=2017.1, ymin = 0, ymax = 1450000, linestyle='dashed', alpha=0.2)
plt.xlabel("Year")
plt.ylabel("Naturalizations")
plt.title("Trends in US naturalizations, before and after IIRIRA")
plt.annotate(xy=(1996.9,250000), s='IIRIRA')
plt.vlines(x=1996.75, ymin = 0, ymax = 1500000, linestyle='dashed', alpha=0.2)
plt.xlim(1995,1999)
plt.ylim(0,1500000)
plt.xticks(np.arange(1995,2000))
plt.yticks(np.arange(0,1500000, step=200000))
plt.tight_layout()

In [None]:
# Are these trends statistically significant? Apply chi-squared test.
'''
STEPS
import by-country naturalization data
create pre 9/11 sample for muslim countries 99-00
create post 9/11 sample for muslim countries 04-05
create pre 9/11 sample for random non-muslim countries (maybe overall immigration that year?)
create post 9/11 sample for random non-muslim countries (maybe overall immigration that year?)
find expected values for pre 9/11 and post 9/11 muslim countries
find degrees of freedom
perform chi-square test
'''

# Get the sample size for petitions filed, denied
# Drop naturalization data (due to lag in how quickly policy changes might affect nat numbers)
nat_iiria_df = nat_iiria.drop(columns=['Naturalized, total'])
# # Create new column to show petitions in pipeline (return to this if needed)
# nat_iiria_df['Petitions in pipeline'] = nat_iiria_df['Petitions filed'] - nat_iiria_df['Petitions denied']
nat_total_pet_filed = nat_iiria_df.loc[:,'Petitions filed'].sum()
nat_total_pet_denied = nat_iiria_df.loc[:,'Petitions denied'].sum()

# Get the sample size for the years
nat_95 = nat_iiria_df.loc[1995,:].sum()
nat_96 = nat_iiria_df.loc[1996,:].sum()
nat_97 = nat_iiria_df.loc[1997,:].sum()
nat_98 = nat_iiria_df.loc[1998,:].sum()
nat_99 = nat_iiria_df.loc[1999,:].sum()

In [None]:
# Get population size
nat_totals = nat_total_pet_filed + nat_total_pet_denied

In [None]:
# Get expected values using 'expected' function we imported

# Calculate expected value for petitions filed in 95-97 (pre-IIRIA)
expected_filed_95 = expected(nat_95, nat_total_pet_filed, nat_totals)
expected_filed_96 = expected(nat_96, nat_total_pet_filed, nat_totals)
expected_filed_97 = expected(nat_97, nat_total_pet_filed, nat_totals)

# Calculate expected value for petitions filed in 98-99 (post-IIRIA)
expected_filed_98 = expected(nat_98, nat_total_pet_filed, nat_totals)
expected_filed_99 = expected(nat_99, nat_total_pet_filed, nat_totals)

# Calculate expected value for petitions denied 95-97
expected_denied_95 = expected(nat_95, nat_total_pet_denied, nat_totals)
expected_denied_96 = expected(nat_96, nat_total_pet_denied, nat_totals)
expected_denied_97 = expected(nat_97, nat_total_pet_denied, nat_totals)

# Calculate expected value for petitions denied 98-99
expected_denied_98 = expected(nat_98, nat_total_pet_denied, nat_totals)
expected_denied_99 = expected(nat_99, nat_total_pet_denied, nat_totals)

# Create list so we can loop through these in a moment
expected_filed = [expected_filed_95, expected_filed_96, expected_filed_97, expected_filed_98, expected_filed_99]
# denied = [expected_denied_95, expected_denied_96, expected_denied_97, expected_denied_98, expected_denied_99]

In [None]:
degrees_freedom = 4
crit_value = 9.49

In [None]:
expected_filed

In [None]:
# Apply chi-squared test for petitions filed and changes over time
from scipy.stats import chisquare

chisquare(nat_iiria_df['Petitions filed'].values, f_exp=expected_filed, ddof=degrees_freedom)

In [None]:
# bracketed values are same as nat_iiria_df['Petitions filed'].values
chisquare([959963, 1277403, 1412712,  932957,  765346], f_exp=expected_filed, ddof=degrees_freedom)

In [None]:
# Stacy end

In [None]:
# Kana start

In [None]:
# Read immigrants by state data
complete_state_df = pd.read_csv("Output_files,Immigrants By State.csv")
complete_state_df.head()

In [None]:
# Initialize the map:
map_2000 = folium.Map(location=[37, -102], zoom_start=4)
state_geo = os.path.join("Resources/us-states.json")

# Add the color for the chloropleth:
map_2000.choropleth(geo_data = state_geo,
                    name = 'choropleth',
                    data = complete_state_df,
                    columns = ['State', '2000'],
                    key_on = 'feature.id',
                    fill_color = 'OrRd',
                    fill_opacity = 0.7,
                    line_opacity = 0.2,
                    legend_name='Number of Immigrants in 2000')

folium.LayerControl().add_to(map_2000)

# Show the map
map_2000

In [None]:
# Initialize the map
state_geo = os.path.join("Resources/us-states.json")
map_2018 = folium.Map(location=[37, -102], zoom_start=4)
 
# Add the color for the chloropleth:
map_2018.choropleth(geo_data = state_geo,
                    name = 'choropleth',
                    data = complete_state_df,
                    columns = ['State', '2018'],
                    key_on = 'feature.id',
                    fill_color = 'OrRd',
                    fill_opacity = 0.7,
                    line_opacity = 0.2,
                    legend_name ='Number of Immigrants in 2018')
folium.LayerControl().add_to(map_2018)

# Show the map
map_2018

In [None]:
# Read csv file for US population vs immigrants analysis
population_comparison = pd.read_csv("Output_files/US Population vs Immigrants.csv")
population_comparison

In [None]:
# Change the figure size
plt.figure(figsize = (20,10))

# Plot us population vs year
plt.plot(population_comparison["Year"],population_comparison["US Population"])

# Plot us number of immigrans vs year
plt.plot(population_comparison["Year"],population_comparison["Number of Immigrants"])

# Set legend
plt.legend(['Number of Immigrants', 'US Population'], loc='upper left')

# Set title
plt.title("US Population vs Immigtants Population in U.S. By Year")

# Set xlabel
plt.xlabel("Year")

# Set ylabel
plt.ylabel("Population")

# Change ticker label to plain from scientific
plt.ticklabel_format(style='plain', axis='y', scilimits=(0,0))

# Show the plot
plt.show()

In [None]:
# Kana end

In [None]:
#Satish Start

In [None]:
immigration_df=pd.read_csv("Output_files/LPR_Years.csv")
immigration_df.Year=pd.to_numeric(immigration_df.Year)
immigration_df.Number=pd.to_numeric(immigration_df.Number)
immigration_plt=immigration_df.plot(kind="line", x="Year", y="Number", grid=True, figsize=(15,10),legend=False,title="Number of Lawful Permanent Resident Status Vs. Years")
max_arrow_y=immigration_df['Number'].max()
max_arrow_x=immigration_df.loc[immigration_df['Number']==max_arrow_y,"Year"].reset_index(drop=True)
plt.annotate(
    f"maximum {max_arrow_x[0],max_arrow_y}", 
    xy=(max_arrow_x[0], max_arrow_y))

min_arrow_y=immigration_df['Number'].min()
min_arrow_x=immigration_df.loc[immigration_df['Number']==min_arrow_y,"Year"].reset_index(drop=True)
plt.annotate(
    f"Minimum {min_arrow_x[0],min_arrow_y}", 
    xy=(min_arrow_x[0], min_arrow_y))

plt.ylabel("Number of Lawful Permanent Resident Status")
plt.xlabel("Timepoint in Years")
plt.tight_layout()
plt.show()

In [None]:
Latin_Data_summ_df=pd.read_csv("Output_files/Latin_Data_summ_df.csv")
#Bar Graph showing the Central America and Years
Latin_Data_summ_df.plot.bar(x='Year', y='Count', rot=0,legend=False)
plt.axis('tight')
plt.title("Number of Immigrants Vs Year")
plt.ylabel("Number of Immigrants from Central America")
plt.xlabel("Year")
plt.tight_layout()
plt.show()

In [None]:
Islam_Country_summ=pd.read_csv("Output_files/Islam_Country_summ.csv")
#Ploting the Graph
Islam_Country_summ.plot.bar(x='Year', y='Count', rot=0,legend=False)
plt.axis('tight')
plt.title("Number of Immigrants Vs Year")
plt.ylabel("Total Number of Immigrants from Arab Countries")
plt.xlabel("Year")
plt.tight_layout()
plt.show()

In [None]:
location_df=pd.read_csv("Output_files/location_df.csv").reset_index(drop=True)
location_df=location_df.iloc[:, 1:]
Country_Data_Merged=pd.read_csv("Output_files/Country_Data_Merged.csv")

In [None]:
#Configuring gmpas
gmaps.configure(api_key=gkey)

fig = gmaps.figure()

#Creating Heat Map
heat_layer = gmaps.heatmap_layer(location_df, weights=Country_Data_Merged['Total'], 
                                 dissipating=False, max_intensity=90000,
                                 point_radius = 1)
#Adding heat maps
fig.add_layer(heat_layer)
fig

In [None]:
Central_America_Data=['Mexico', 'Guatemala', 'Honduras', 'Nicaragua', 'El Salvador', 'Costa Rica', 'Panama', 'Belize']
Latin_Population=Country_Data_Merged[Country_Data_Merged['Region and country of birth'].isin(Central_America_Data)].T
Islam_Country_Data=['Afghanistan','Iran','Yemen','Jordan','Saudi Arabia','Sudan','Pakistan','Syria','Oman']
Islam_Population=Country_Data_Merged[Country_Data_Merged['Region and country of birth'].isin(Islam_Country_Data)].T

In [None]:
Latin_Population=Latin_Population[2:]
#df.groupby(['']).mean()
#new_header_Latin = Country_Data_2018_df.iloc[0]
Latin_Population_1999=Latin_Population.loc['1999',:].sum()
Latin_Population_2000=Latin_Population.loc['2000',:].sum()
Latin_Population_2005=Latin_Population.loc['2005',:].sum()
Latin_Population_2006=Latin_Population.loc['2006',:].sum()
Islam_Population_1999=Islam_Population.loc['1999',:].sum()
Islam_Population_2000=Islam_Population.loc['2000',:].sum()
Islam_Population_2005=Islam_Population.loc['2005',:].sum()
Islam_Population_2006=Islam_Population.loc['2006',:].sum()
List_total=[['1999',Latin_Population_1999,Islam_Population_1999],['2000',Latin_Population_2000,Islam_Population_2000],['2005',Latin_Population_2005,Islam_Population_2005],['2006',Latin_Population_2006,Islam_Population_2006]]

In [None]:
List_total_df=pd.DataFrame(List_total, columns=['Year','Latin America', 'Islam Country'])
Total_1999=Latin_Population_1999+Islam_Population_1999
Total_2000=Latin_Population_2000+Islam_Population_2000
Total_2005=Latin_Population_2005+Islam_Population_2005
Total_2006=Latin_Population_2006+Islam_Population_2006

In [None]:
Latin_Total=Latin_Population_1999+Latin_Population_2000+Latin_Population_2005+Latin_Population_2006
Islam_Total=Islam_Population_1999+Islam_Population_2000+Islam_Population_2005+Islam_Population_2006
Grand_Total=Latin_Total+Islam_Total

In [None]:
expected_99 = expected(Total_1999, Islam_Total, Grand_Total)
expected_00 = expected(Total_2000, Islam_Total, Grand_Total)
expected_05 = expected(Total_2005, Islam_Total, Grand_Total)
expected_06 = expected(Total_2006, Islam_Total, Grand_Total)
expected_list=[expected_99,expected_00,expected_05,expected_06]
expected_list

In [None]:
degree_of_freedom=3
critical_value=7.815
from scipy.stats import chisquare

In [None]:
List_total_df.plot(kind="bar")
plt.xticks(np.arange(4), ('1999', '2000', '2005', '2006'), rotation=0)
plt.title("Number of Central America vs Islam Countries pre/Post 9/11")
plt.ylabel("Number of LPR")
plt.xlabel("Year")
plt.show()

In [None]:
chisquare([ 675.63878934, 1636.52359017, 1512.45163711,  583.77323195], f_exp=expected_list, ddof=degree_of_freedom)

In [None]:
##Takeaway- We found statistically significant differences for LPR rates between Islamic countries and Central America after 9/11 . Even though the number of immigrants from Islam countries slightly increased as compare to pre 9/11 , but the rate of increase is less than the Central America.

In [None]:
# Satish end

In [None]:
# Umar start

In [None]:
# Umar End

In [None]:
# Narrative/summary goes here