In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import gmaps

In [None]:
# Stacy's code starts here

In [None]:
# Load naturalization table
nat_file = 'fy2018_naturalization.xlsx'
filepath = os.path.join('.', 'Resources', nat_file)

naturalization = pd.read_excel(filepath, header=5)

In [None]:
# Drop notes data at end of document
naturalization.drop(labels=range(112,118), inplace=True)
naturalization.tail()

In [None]:
# Rename columns
naturalization.rename(columns={
    'filed': 'Petitions filed',
    'Total': 'Naturalized, total',
    'denied': 'Petitions denied'
}, inplace=True)

# Drop unneeded columns
naturalization = naturalization.drop(labels=['Civilian', 'Military 2', 'Not reported'], axis='columns')

In [None]:
# Find funny/footnoted years and fix them
for index, row in naturalization.iterrows():
    
    year = naturalization.loc[index, 'Year']
    
    if len(str(year)) > 4:
        year = int(year[0:5])
        naturalization.loc[index, 'Year'] = year
    else:
        pass

# Set year as index
naturalization.set_index(keys=['Year'], inplace=True)

In [None]:
# Change datatype to int for all columns
for c in naturalization.columns:
    naturalization[c] = naturalization[c].astype('int')

In [None]:
naturalization.head()

In [None]:
# Load asylum seeker age/gender/etc table (2018)
asy_2018_file = 'fy2018_table18d_asylum_age_etc.xlsx'
filepath = os.path.join('.', 'Resources', asy_2018_file)

asylum_2018 = pd.read_excel(filepath, header=4)

In [None]:
asylum_2018.tail(10)

In [None]:
# Drop notes data at end of document
asylum_2018.drop(labels=range(34,38), inplace=True)
asylum_2018.tail()

In [None]:
# Split into sex [5:8], rename column
asylum_2018_sex = asylum_2018.iloc[1:4,0:2]
asylum_2018_sex.rename(columns={
    "Characteristic": "Sex"
}, inplace=True)

In [None]:
# Drop unneeded rows and reset index
asylum_2018_sex.set_index(keys='Sex', inplace=True)
asylum_2018_sex

In [None]:
# Rename "total" to reflect dataset scope
asylum_2018_sex.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_sex

In [None]:
# Split out broad age group [27:31]
asylum_2018_broad_age = asylum_2018.iloc[22:27,0:2]
asylum_2018_broad_age_total = asylum_2018_broad_age.drop([22])
asylum_2018_broad_age_total.rename(columns={
    "Characteristic": "Age"
}, inplace=True)
asylum_2018_broad_age_total.set_index(keys='Age', inplace=True)
asylum_2018_broad_age_total

In [None]:
# Rename "total" to reflect dataset scope
asylum_2018_broad_age_total.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_broad_age_total

In [None]:
# Split out marital status [32:38]

In [None]:
asylum_2018_marital = asylum_2018.iloc[28:34,0:2]
asylum_2018_marital.rename(columns={
    "Characteristic": "Marital Status"
}, inplace=True)
asylum_2018_marital.set_index(keys='Marital Status', inplace=True)
asylum_2018_marital

In [None]:
# Rename "total" to reflect dataset scope
asylum_2018_marital.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_marital

In [None]:
# Cleaning asylum demographic data from 2009
asy_2009_file = 'fy_2009_table15d_asylum_age_etc.xls'
filepath = os.path.join('.', 'Resources', asy_2009_file)

asylum_2009 = pd.read_excel(filepath, header=5)

In [None]:
asylum_2009.head(10)

In [None]:
# Drop notes data at end of document
asylum_2009.drop(labels=range(37,41), inplace=True)
asylum_2009.tail()

In [None]:
# Split into sex [5:8], rename column
asylum_2009_sex = asylum_2009.iloc[1:4,0:2]
asylum_2009_sex.rename(columns={
    "Characteristic": "Sex"
}, inplace=True)
asylum_2009_sex

# Rename "total" to reflect dataset scope
asylum_2009_sex.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

# reset index
asylum_2009_sex.set_index(keys='Sex', inplace=True)
asylum_2009_sex

In [None]:
# Split out broad age group [32:35]
asylum_2009_broad_age = asylum_2009.iloc[25:29,0:2]
asylum_2009_broad_age.rename(columns={
    "Characteristic": "Age"
}, inplace=True)
asylum_2009_broad_age.set_index(keys='Age', inplace=True)
asylum_2009_broad_age

# Rename "total" to reflect dataset scope
asylum_2009_broad_age.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

asylum_2009_broad_age

In [None]:
# Split out marital status
asylum_2009_marital = asylum_2009.iloc[31:37,0:2]
asylum_2009_marital.rename(columns={
    "Characteristic": "Marital Status"
}, inplace=True)
asylum_2009_marital.set_index(keys='Marital Status', inplace=True)

# Rename "total" to reflect dataset scope
asylum_2009_marital.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

asylum_2009_marital

In [None]:
# Stacy's code ends here

In [None]:
# Kana's code starts here

In [None]:
# Kana's code ends here

In [None]:
# Satish start

In [None]:
#Reading Data for Permanent_Resident_Years 
immigration_df=pd.read_csv("Resources/Permanent_Resident_Years.csv")
immigration_df['Number'] = [x.replace(',', '') for x in immigration_df['Number']]

In [None]:
#Data Cleaning
immigration_df.Year=pd.to_numeric(immigration_df.Year)
immigration_df.Number=pd.to_numeric(immigration_df.Number)
#Plotting the Graph
immigration_plt=immigration_df.plot(kind="line", x="Year", y="Number", grid=True, figsize=(15,10),legend=False,title="Number of Lawful Permanent Resident Status Vs. Years")
#Finding Max value
max_arrow_y=immigration_df['Number'].max()
max_arrow_x=immigration_df.loc[immigration_df['Number']==max_arrow_y,"Year"].reset_index(drop=True)
#Printing Max value in graph
plt.annotate(
    f"maximum {max_arrow_x[0],max_arrow_y}", 
    xy=(max_arrow_x[0], max_arrow_y))
#Finding Minimum value
min_arrow_y=immigration_df['Number'].min()
min_arrow_x=immigration_df.loc[immigration_df['Number']==min_arrow_y,"Year"].reset_index(drop=True)
#Printing Min Value in graph
plt.annotate(
    f"Minimum {min_arrow_x[0],min_arrow_y}", 
    xy=(min_arrow_x[0], min_arrow_y))
#Labeling the Graph
plt.ylabel("Number of Lawful Permanent Resident Status")
plt.xlabel("Timepoint in Years")
plt.tight_layout()
plt.show()

In [None]:
#Reading CSV
Country_Data_2018=pd.read_csv("Resources/Country_Data_2018.csv")
Country_Data_1999=pd.read_csv("Resources/Country_Data_1999.csv")
Country_Data_2009=pd.read_csv("Resources/Country_Data_2009.csv")

In [None]:
#Data cleaning,Removing extra column
Country_Data_1999=Country_Data_1999.iloc[:, :-1]

In [None]:
#Removing Data which has No value
Country_Data_2018_df=Country_Data_2018.dropna()
Country_Data_1999_df=Country_Data_1999.dropna()
Country_Data_2009_df=Country_Data_2009.dropna()

In [None]:
#Extract First Column so that it can be used for Name as Header
new_header_2018 = Country_Data_2018_df.iloc[0]
new_header_1999 = Country_Data_1999_df.iloc[0]
new_header_2009 = Country_Data_2009_df.iloc[0]

In [None]:
#Renaming the Header removing first row
Country_Data_2018_df.columns=new_header_2018
Country_Data_2018_df=Country_Data_2018_df[1:]

Country_Data_1999_df.columns=new_header_1999
Country_Data_1999_df=Country_Data_1999_df[1:]


Country_Data_2009_df.columns=new_header_2009
Country_Data_2009_df=Country_Data_2009_df[1:]

In [None]:
#List of Countries in central America
Central_America_Data=['Mexico', 'Guatemala', 'Honduras', 'Nicaragua', 'El Salvador', 'Costa Rica', 'Panama', 'Belize']
#Getting only records of Central America from main Data Set
Latin_Data_df=Country_Data_2018_df[Country_Data_2018_df['Region and country of birth'].isin(Central_America_Data)]
#Data Cleaning
Latin_Data_df=Latin_Data_df.apply(lambda x: x.str.replace(',',''))

In [None]:
#Creating new DataFrame for required Data
Latin_Data_summ=[['2014',pd.to_numeric(Latin_Data_df['2014']).sum()],['2015',pd.to_numeric(Latin_Data_df['2015']).sum()],['2017',pd.to_numeric(Latin_Data_df['2017']).sum()],['2018',pd.to_numeric(Latin_Data_df['2018']).sum()]]
Latin_Data_summ_df=pd.DataFrame(Latin_Data_summ, columns = ['Year', 'Count'])

In [None]:
#Bar Graph showing the Central America and Years
Latin_Data_summ_df.plot.bar(x='Year', y='Count', rot=0,legend=False)
plt.axis('tight')
plt.title("Number of Immigrants Vs Year")
plt.ylabel("Total Number of Immigrants from Central America")
plt.xlabel("Year")
plt.tight_layout()
plt.show()

In [None]:
#List of Islamic Countries 
Islam_Country_Data=['Afghanistan','Iran','Yemen','Jordan','Saudi Arabia','Sudan','Pakistan','Syria','Oman']
#Data Set till 1999 
Islam_Data_1999_df=Country_Data_1999_df[Country_Data_1999_df['Region and country of birth'].isin(Islam_Country_Data)]
#Data Cleaning
Islam_Data_1999_df=Islam_Data_1999_df.apply(lambda x: x.str.replace(',',''))

In [None]:
#Data Set form 2000 to 2009 
Islam_Data_2009_df=Country_Data_2009_df[Country_Data_2009_df['Region and country of birth'].isin(Islam_Country_Data)]
#Data Cleaning
Islam_Data_2009_df=Islam_Data_2009_df.apply(lambda x: x.str.replace(',',''))

In [None]:
#Merging the DataFrame
Merge_Islam_Country=pd.merge(Islam_Data_1999_df,Islam_Data_2009_df,how='outer')
#New DataFrame with reuqired Dataset
Islam_Country_summ=[['1999',pd.to_numeric(Merge_Islam_Country['1999']).sum()],['2000',pd.to_numeric(Merge_Islam_Country['2000']).sum()],['2005',pd.to_numeric(Merge_Islam_Country['2005']).sum()],['20006',pd.to_numeric(Merge_Islam_Country['2006']).sum()]]
Islam_Country_summ=pd.DataFrame(Islam_Country_summ, columns = ['Year', 'Count'])

In [None]:
#Ployyinh the Graph
Islam_Country_summ.plot.bar(x='Year', y='Count', rot=0,legend=False)
plt.axis('tight')
plt.title("Number of Immigrants Vs Year")
plt.ylabel("Total Number of Immigrants from Arab Countries")
plt.xlabel("Year")
plt.tight_layout()
plt.show()

In [None]:
# Satish end

In [None]:
# Satish's code starts here

In [None]:
# Satish's code ends here

In [None]:
# Umar's code starts here

In [None]:
# Umar's code ends here