In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import folium
import gmaps

In [None]:
# Stacy's code starts here

## Naturalization data table cleaning

In [None]:
# Load naturalization table
nat_file = 'fy2018_naturalization.xlsx'
filepath = os.path.join('.', 'Resources', nat_file)

naturalization = pd.read_excel(filepath, header=5)

In [None]:
# Drop notes data at end of document
naturalization.drop(labels=range(112,118), inplace=True)
naturalization.tail()

In [None]:
# Rename columns
naturalization.rename(columns={
    'filed': 'Petitions filed',
    'Total': 'Naturalized, total',
    'denied': 'Petitions denied'
}, inplace=True)

# Drop unneeded columns
naturalization = naturalization.drop(labels=['Civilian', 'Military 2', 'Not reported'], axis='columns')

In [None]:
# Find funny/footnoted years and fix them
for index, row in naturalization.iterrows():
    
    year = naturalization.loc[index, 'Year']
    
    if len(str(year)) > 4:
        year = int(year[0:5])
        naturalization.loc[index, 'Year'] = year
    else:
        pass

# Set year as index
naturalization.set_index(keys=['Year'], inplace=True)

In [None]:
# Change datatype to int for all columns
for c in naturalization.columns:
    naturalization[c] = naturalization[c].astype('int')

In [None]:
naturalization.head()

In [None]:
# Clean up weird column spacing issues
for c in nat_country.columns:
    nat_country.rename(columns={
    c: str(c).strip()
}, inplace=True)
    
nat_country.columns

Index(['Region and country of birth', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', '2018'],
      dtype='object')

In [85]:
# Test if samples from certain years have normal distribution



## Asylum seeker demographic data 2018 cleaning

In [9]:
# Load asylum seeker age/gender/etc table (2018)
asy_2018_file = 'fy2018_table18d_asylum_age_etc.xlsx'
filepath = os.path.join('.', 'Resources', asy_2018_file)

asylum_2018 = pd.read_excel(filepath, header=4)

In [None]:
asylum_2018.tail(10)

In [None]:
# Drop notes data at end of document
asylum_2018.drop(labels=range(34,38), inplace=True)
asylum_2018.tail()

In [None]:
# Split into sex [5:8], rename column
asylum_2018_sex = asylum_2018.iloc[1:4,0:2]
asylum_2018_sex.rename(columns={
    "Characteristic": "Sex"
}, inplace=True)

In [None]:
# Drop unneeded rows and reset index
asylum_2018_sex.set_index(keys='Sex', inplace=True)
asylum_2018_sex

In [None]:
# Split into sex [5:8], rename column
asylum_2018_sex = asylum_2018.iloc[1:4,0:2]
asylum_2018_sex.rename(columns={
    "Characteristic": "Sex"
}, inplace=True)

# reset index
asylum_2018_sex.set_index(keys='Sex', inplace=True)
asylum_2018_sex

# Rename "total" to reflect dataset scope
asylum_2018_sex.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_sex

In [None]:
# Split out broad age group [27:31]
asylum_2018_broad_age = asylum_2018.iloc[22:27,0:2]
asylum_2018_broad_age_total = asylum_2018_broad_age.drop([22])
asylum_2018_broad_age_total.rename(columns={
    "Characteristic": "Age"
}, inplace=True)
asylum_2018_broad_age_total.set_index(keys='Age', inplace=True)
asylum_2018_broad_age_total

In [None]:
# Split out broad age group [27:31]
asylum_2018_broad_age = asylum_2018.iloc[22:27,0:2]
asylum_2018_broad_age_total = asylum_2018_broad_age.drop([22])
asylum_2018_broad_age_total.rename(columns={
    "Characteristic": "Age"
}, inplace=True)
asylum_2018_broad_age_total.set_index(keys='Age', inplace=True)
asylum_2018_broad_age_total

# Rename "total" to reflect dataset scope
asylum_2018_broad_age_total.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_broad_age_total

In [None]:
# Split out marital status [32:38]

In [None]:
asylum_2018_marital = asylum_2018.iloc[28:34,0:2]
asylum_2018_marital.rename(columns={
    "Characteristic": "Marital Status"
}, inplace=True)
asylum_2018_marital.set_index(keys='Marital Status', inplace=True)
asylum_2018_marital

In [None]:
# Split out marital status [32:38]
asylum_2018_marital = asylum_2018.iloc[28:34,0:2]
asylum_2018_marital.rename(columns={
    "Characteristic": "Marital Status"
}, inplace=True)
asylum_2018_marital.set_index(keys='Marital Status', inplace=True)
asylum_2018_marital

# Rename "total" to reflect dataset scope
asylum_2018_marital.rename(columns={
    "Total": "Asylum 2018"
}, inplace=True)
asylum_2018_marital

## Asylum seeker demographic data 2009 cleaning

In [None]:
# Cleaning asylum demographic data from 2009
asy_2009_file = 'fy_2009_table15d_asylum_age_etc.xls'
filepath = os.path.join('.', 'Resources', asy_2009_file)

asylum_2009 = pd.read_excel(filepath, header=5)

In [None]:
asylum_2009.head(10)

In [None]:
# Drop notes data at end of document
asylum_2009.drop(labels=range(37,41), inplace=True)
asylum_2009.tail()

In [None]:
# Split into sex [5:8], rename column
asylum_2009_sex = asylum_2009.iloc[1:4,0:2]
asylum_2009_sex.rename(columns={
    "Characteristic": "Sex"
}, inplace=True)
asylum_2009_sex

# Rename "total" to reflect dataset scope
asylum_2009_sex.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

# reset index
asylum_2009_sex.set_index(keys='Sex', inplace=True)
asylum_2009_sex

In [None]:
# Split out broad age group [32:35]
asylum_2009_broad_age = asylum_2009.iloc[25:29,0:2]
asylum_2009_broad_age.rename(columns={
    "Characteristic": "Age"
}, inplace=True)

asylum_2009_broad_age['Age'] = asylum_2009_broad_age['Age'].str.strip()
asylum_2009_broad_age['Age']

asylum_2009_broad_age.set_index(keys='Age', inplace=True)

asylum_2009_broad_age.index

# Rename "total" to reflect dataset scope
asylum_2009_broad_age.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

asylum_2009_broad_age

In [None]:
# Split out 2009 marital status
asylum_2009_marital = asylum_2009.iloc[31:37,0:2]
asylum_2009_marital.rename(columns={
    "Characteristic": "Marital Status"
}, inplace=True)
asylum_2009_marital.set_index(keys='Marital Status', inplace=True)

# Rename "total" to reflect dataset scope
asylum_2009_marital.rename(columns={
    "Total": "Asylum 2009"
}, inplace=True)

asylum_2009_marital

## Join asylum and LPR data

In [None]:
# Clean up inconsistencies between df indices
asylum_2009_broad_age.rename({'Under 16': '< 16', 
                                          'Age 16 to 20': '16 - 20', 
                                          'Age 21 and over': '21+'}, axis='index', inplace=True)

asylum_2018_broad_age_total.rename({'Under 16 years': '< 16', 
                                          '16 to 20 years': '16 - 20', 
                                          '21 years and over': '21+'}, axis='index', inplace=True)

In [69]:
# Merge LPR and Asyulym seeker datasets

# Join asylum datasets together
lpr_asylum_sex = asylum_2009_sex.merge(asylum_2018_sex, how="inner", left_index=True, right_index=True)
lpr_asylum_age = asylum_2009_broad_age.merge(asylum_2018_broad_age_total, how="inner", left_index=True, right_index=True)
lpr_asylum_marital = asylum_2009_marital.merge(asylum_2018_marital, how="inner", left_index=True, right_index=True)

Unnamed: 0_level_0,Asylum 2009,Asylum 2018
Marital Status,Unnamed: 1_level_1,Unnamed: 2_level_1
Single,40798.0,14154.0
Married,29770.0,10142.0
Widowed,2452.0,232.0
Divorced/separated,1483.0,896.0
Unknown,99.0,15.0


In [None]:
# Stacy's code ends here

In [None]:
# Kana's code starts here

In [None]:
# Read bystate csv data
bystate = pd.read_csv("Resources/By state data.csv")

# Dropna
bystate = bystate.dropna()

# Drop others
bystate.drop(bystate.tail(1).index,inplace=True)

# Change data to integer
bystate.iloc[:,1:20].astype(int)

# Show the dataframe
bystate.head()

In [None]:
# List of US state abbreviation
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Palau': 'PW',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

# Reverse key and value
abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))

In [None]:
# Create an empty dictionary list
dict_list=[]

# For each key and value in dictionary, combine them and add them to a list
for key,value in abbrev_us_state.items():
    dict_list.append((key,value))
    
# Print the list
#print (dict_list)

In [None]:
# Create a dataframe using dictionary list
state_abbrev = pd.DataFrame(dict_list)
state_abbrev.columns = ["Abbrev","State"]

# Show the dataframe
state_abbrev.head()

In [None]:
# Merge bystate data and state abbreveation dataframe
complete_state_df = pd.merge(bystate, state_abbrev, on = "State")

# Rename columns
complete_state_df = complete_state_df.rename(columns = {"State" : "State Name",
                                                        "Abbrev" : "State"})

# Show the dataframe
complete_state_df

In [None]:
# Get 2000 immigration data
bystate_2000 = complete_state_df.loc[:,["State","2000"]]

# Change data type
convert_dict = {'State': str, 
                '2000': int} 

# 
bystate_2000 = bystate_2000.astype(convert_dict) 
bystate_2000.head()

In [None]:
# Get 2018 immigration data
bystate_2018 = complete_state_df.loc[:,["State","2018"]]
convert_dict = {'State': str, 
                '2018': int} 

# Change data type
bystate_2018 = bystate_2018.astype(convert_dict) 

# Show the 
bystate_2018.head()

In [None]:
# Initialize the map:
map_2000 = folium.Map(location=[37, -102], zoom_start=4)
state_geo = os.path.join('us-states.json')

# Add the color for the chloropleth:
map_2000.choropleth(geo_data=state_geo,
                    name='choropleth',
                    data=bystate_2000,
                    columns=['State', '2000'],
                    key_on='feature.id',
                    fill_color='YlGn',
                    fill_opacity=0.7,
                    line_opacity=0.2,
                    legend_name='Number of Immigrants in 2000')

folium.LayerControl().add_to(map_2000)

# Show the map
map_2000

In [None]:
# Initialize the map
state_geo = os.path.join('us-states.json')
map_2018 = folium.Map(location=[37, -102], zoom_start=4)
 
# Add the color for the chloropleth:
map_2018.choropleth(geo_data=state_geo,
                    name='choropleth',
                    data=bystate_2018,
                    columns=['State', '2018'],
                    key_on='feature.id',
                    fill_color='YlGn',
                    fill_opacity=0.7,
                    line_opacity=0.2,
                    legend_name='Number of Immigrants in 2018')
folium.LayerControl().add_to(map_2018)

# Show the map
map_2018

In [None]:
# Kana's code ends here

In [None]:
# Satish start

In [None]:
#Reading Data for Permanent_Resident_Years 
immigration_df=pd.read_csv("Resources/Permanent_Resident_Years.csv")
immigration_df['Number'] = [x.replace(',', '') for x in immigration_df['Number']]

In [None]:
#Data Cleaning
immigration_df.Year=pd.to_numeric(immigration_df.Year)
immigration_df.Number=pd.to_numeric(immigration_df.Number)
#Plotting the Graph
immigration_plt=immigration_df.plot(kind="line", x="Year", y="Number", grid=True, figsize=(15,10),legend=False,title="Number of Lawful Permanent Resident Status Vs. Years")
#Finding Max value
max_arrow_y=immigration_df['Number'].max()
max_arrow_x=immigration_df.loc[immigration_df['Number']==max_arrow_y,"Year"].reset_index(drop=True)
#Printing Max value in graph
plt.annotate(
    f"maximum {max_arrow_x[0],max_arrow_y}", 
    xy=(max_arrow_x[0], max_arrow_y))
#Finding Minimum value
min_arrow_y=immigration_df['Number'].min()
min_arrow_x=immigration_df.loc[immigration_df['Number']==min_arrow_y,"Year"].reset_index(drop=True)
#Printing Min Value in graph
plt.annotate(
    f"Minimum {min_arrow_x[0],min_arrow_y}", 
    xy=(min_arrow_x[0], min_arrow_y))
#Labeling the Graph
plt.ylabel("Number of Lawful Permanent Resident Status")
plt.xlabel("Timepoint in Years")
plt.tight_layout()
plt.show()

In [None]:
#Reading CSV
Country_Data_2018=pd.read_csv("Resources/Country_Data_2018.csv")
Country_Data_1999=pd.read_csv("Resources/Country_Data_1999.csv")
Country_Data_2009=pd.read_csv("Resources/Country_Data_2009.csv")

In [None]:
#Data cleaning,Removing extra column
Country_Data_1999=Country_Data_1999.iloc[:, :-1]

In [None]:
#Removing Data which has No value
Country_Data_2018_df=Country_Data_2018.dropna()
Country_Data_1999_df=Country_Data_1999.dropna()
Country_Data_2009_df=Country_Data_2009.dropna()

In [None]:
#Extract First Column so that it can be used for Name as Header
new_header_2018 = Country_Data_2018_df.iloc[0]
new_header_1999 = Country_Data_1999_df.iloc[0]
new_header_2009 = Country_Data_2009_df.iloc[0]

In [None]:
#Renaming the Header removing first row
Country_Data_2018_df.columns=new_header_2018
Country_Data_2018_df=Country_Data_2018_df[1:]

Country_Data_1999_df.columns=new_header_1999
Country_Data_1999_df=Country_Data_1999_df[1:]


Country_Data_2009_df.columns=new_header_2009
Country_Data_2009_df=Country_Data_2009_df[1:]

In [None]:
#List of Countries in central America
Central_America_Data=['Mexico', 'Guatemala', 'Honduras', 'Nicaragua', 'El Salvador', 'Costa Rica', 'Panama', 'Belize']
#Getting only records of Central America from main Data Set
Latin_Data_df=Country_Data_2018_df[Country_Data_2018_df['Region and country of birth'].isin(Central_America_Data)]
#Data Cleaning
Latin_Data_df=Latin_Data_df.apply(lambda x: x.str.replace(',',''))

In [None]:
#Creating new DataFrame for required Data
Latin_Data_summ=[['2014',pd.to_numeric(Latin_Data_df['2014']).sum()],['2015',pd.to_numeric(Latin_Data_df['2015']).sum()],['2017',pd.to_numeric(Latin_Data_df['2017']).sum()],['2018',pd.to_numeric(Latin_Data_df['2018']).sum()]]
Latin_Data_summ_df=pd.DataFrame(Latin_Data_summ, columns = ['Year', 'Count'])

In [None]:
#Bar Graph showing the Central America and Years
Latin_Data_summ_df.plot.bar(x='Year', y='Count', rot=0,legend=False)
plt.axis('tight')
plt.title("Number of Immigrants Vs Year")
plt.ylabel("Total Number of Immigrants from Central America")
plt.xlabel("Year")
plt.tight_layout()
plt.show()

In [None]:
#List of Islamic Countries 
Islam_Country_Data=['Afghanistan','Iran','Yemen','Jordan','Saudi Arabia','Sudan','Pakistan','Syria','Oman']
#Data Set till 1999 
Islam_Data_1999_df=Country_Data_1999_df[Country_Data_1999_df['Region and country of birth'].isin(Islam_Country_Data)]
#Data Cleaning
Islam_Data_1999_df=Islam_Data_1999_df.apply(lambda x: x.str.replace(',',''))

In [None]:
#Data Set form 2000 to 2009 
Islam_Data_2009_df=Country_Data_2009_df[Country_Data_2009_df['Region and country of birth'].isin(Islam_Country_Data)]
#Data Cleaning
Islam_Data_2009_df=Islam_Data_2009_df.apply(lambda x: x.str.replace(',',''))

In [None]:
#Merging the DataFrame
Merge_Islam_Country=pd.merge(Islam_Data_1999_df,Islam_Data_2009_df,how='outer')
#New DataFrame with reuqired Dataset
Islam_Country_summ=[['1999',pd.to_numeric(Merge_Islam_Country['1999']).sum()],['2000',pd.to_numeric(Merge_Islam_Country['2000']).sum()],['2005',pd.to_numeric(Merge_Islam_Country['2005']).sum()],['20006',pd.to_numeric(Merge_Islam_Country['2006']).sum()]]
Islam_Country_summ=pd.DataFrame(Islam_Country_summ, columns = ['Year', 'Count'])

In [None]:
#Ployyinh the Graph
Islam_Country_summ.plot.bar(x='Year', y='Count', rot=0,legend=False)
plt.axis('tight')
plt.title("Number of Immigrants Vs Year")
plt.ylabel("Total Number of Immigrants from Arab Countries")
plt.xlabel("Year")
plt.tight_layout()
plt.show()

In [None]:
# Satish end

In [None]:
# Satish's code starts here

In [None]:
# Satish's code ends here

In [3]:
# Umar's code starts here
#Read in the Excel file and view the headers
Lawful_df = pd.read_excel("./Resources/fy2018_Lawful.xlsx", header=4)
Lawful_df.head()

Unnamed: 0,Characteristic,Total,Female,Male,Unknown
0,AGE,,,,
1,Total,1096611.0,584426.0,512176.0,9
2,Under 1 year,2959.0,1472.0,1487.0,-
3,1 to 4 years,34215.0,16976.0,17239.0,-
4,5 to 9 years,61533.0,30098.0,31433.0,2


In [4]:
#Use the iloc function to locate the point of interest in a data set
Broad_age = Lawful_df.iloc[19:23,:]
Broad_age

Unnamed: 0,Characteristic,Total,Female,Male,Unknown
19,Total,1096611.0,584426.0,512176.0,9
20,Under 16 years,177300.0,86839.0,90458.0,3
21,16 to 20 years,85830.0,42113.0,43715.0,2
22,21 years and over,833481.0,455474.0,378003.0,4


In [5]:
#View the characteristic and Total columns 
Broad_age_df = pd.DataFrame(Broad_age)
Sex = Broad_age_df.iloc[:, 0:2]
Sex

Unnamed: 0,Characteristic,Total
19,Total,1096611.0
20,Under 16 years,177300.0
21,16 to 20 years,85830.0
22,21 years and over,833481.0


In [6]:
#Rename your columns 
Cleaned = Sex.rename(columns={"Characteristic": "Age", "Total": "Lawful 2018"})
Index_age = Cleaned.set_index("Age")
Index_age

Unnamed: 0_level_0,Lawful 2018
Age,Unnamed: 1_level_1
Total,1096611.0
Under 16 years,177300.0
16 to 20 years,85830.0
21 years and over,833481.0


In [7]:
#View a single row 
Sex = Lawful_df.iloc[19,1:]
Sex.to_frame(name="LPR 2018")

Unnamed: 0,LPR 2018
Total,1096610.0
Female,584426.0
Male,512176.0
Unknown,9.0


In [8]:
#Use the iloc function to locate the point of interest in a data set
Marital_status = Lawful_df.iloc[24:30,:]
Marital_status

Unnamed: 0,Characteristic,Total,Female,Male,Unknown
24,Total,1096611.0,584426.0,512176.0,9
25,Married,627443.0,342625.0,284816.0,2
26,Single,400196.0,191733.0,208457.0,6
27,Widowed,26579.0,23446.0,3133.0,-
28,Divorced/separated,32416.0,22045.0,10371.0,-
29,Unknown,9977.0,4577.0,5399.0,1


In [9]:
#View the characteristic and Total columns 
New_marital_df = pd.DataFrame(Marital_status)
Specific = New_marital_df.iloc[:, 0:2]
Specific

Unnamed: 0,Characteristic,Total
24,Total,1096611.0
25,Married,627443.0
26,Single,400196.0
27,Widowed,26579.0
28,Divorced/separated,32416.0
29,Unknown,9977.0


In [10]:
#Rename your columns 
Renamed = Specific.rename(columns={"Characteristic": "Marital Status", "Total": "Lawful Permanent Resident 2018"})
Renamed.head()

Unnamed: 0,Marital Status,Lawful Permanent Resident 2018
24,Total,1096611.0
25,Married,627443.0
26,Single,400196.0
27,Widowed,26579.0
28,Divorced/separated,32416.0


In [11]:
#View a single column 
Status = Lawful_df.iloc[24:30,0:2]
Name18 = Status.rename(columns={"Characteristic": "Marital Status", "Total": "Lawful 2018"})
#Set marital status as index for clarity
Name18.set_index("Marital Status", inplace=True)
Name18

Unnamed: 0_level_0,Lawful 2018
Marital Status,Unnamed: 1_level_1
Total,1096611.0
Married,627443.0
Single,400196.0
Widowed,26579.0
Divorced/separated,32416.0
Unknown,9977.0


In [12]:
#Read in the Excel file and 
Lawful09_df = pd.read_excel("./Resources/fy2009_Lawful.xls", header=4)
Lawful09_df.head()

Unnamed: 0,Characteristic,Total,Male,Female,Unknown
0,AGE,,,,
1,Total,1130818.0,513015.0,617799.0,4
2,Under 1 year,5782.0,2706.0,3076.0,-
3,1 to 4 years,32395.0,15975.0,16420.0,-
4,5 to 9 years,55373.0,28263.0,27110.0,-


In [13]:
#Use the iloc function to locate the point of interest in a data set
Broad09_age = Lawful09_df.iloc[20:24,:]
Broad09_age

Unnamed: 0,Characteristic,Total,Male,Female,Unknown
20,Total,1130818.0,513015.0,617799,4
21,Under 16 years,185960.0,94529.0,91431,-
22,16 to 20 years,101864.0,50720.0,51144,-
23,21 years and over,842992.0,367764.0,475224,4


In [14]:
#View the characteristic and Total columns 
Broad09_age_df = pd.DataFrame(Broad09_age)
Sex09 = Broad09_age_df.iloc[:, 0:2]
Sex09

Unnamed: 0,Characteristic,Total
20,Total,1130818.0
21,Under 16 years,185960.0
22,16 to 20 years,101864.0
23,21 years and over,842992.0


In [15]:
#Rename your columns 
Age09 = Sex09.rename(columns={"Characteristic": "Age", "Total": "Lawful Permanent Resident 2009"})
New_index09 = Age09.set_index("Age")
New_index09

Unnamed: 0_level_0,Lawful Permanent Resident 2009
Age,Unnamed: 1_level_1
Total,1130818.0
Under 16 years,185960.0
16 to 20 years,101864.0
21 years and over,842992.0


In [16]:
#View a single row 
Sex09 = Lawful09_df.iloc[20,1:]
Sex09.to_frame(name="LPR 2009")

Unnamed: 0,LPR 2009
Total,1130820.0
Male,513015.0
Female,617799.0
Unknown,4.0


In [17]:
#Use the iloc function to locate the point of interest in a data set
Marital09_status = Lawful09_df.iloc[26:32,:]
Marital09_status

Unnamed: 0,Characteristic,Total,Male,Female,Unknown
26,Total,1130818.0,513015.0,617799,4
27,Single,417232.0,219812.0,197419,1
28,Married,654674.0,279354.0,375320,-
29,Widowed,28439.0,3288.0,25151,-
30,Divorced/separated,26015.0,8261.0,17754,-
31,Unknown,4458.0,2300.0,2155,3


In [18]:
#View the characteristic and Total columns 
New09_marital_df = pd.DataFrame(Marital09_status)
Specific09 = New09_marital_df.iloc[:, 0:2]
Specific09

Unnamed: 0,Characteristic,Total
26,Total,1130818.0
27,Single,417232.0
28,Married,654674.0
29,Widowed,28439.0
30,Divorced/separated,26015.0
31,Unknown,4458.0


In [19]:
#Rename your columns 
Renamed09 = Specific09.rename(columns={"Characteristic": "Marital Status", "Total": "Lawful Permanent Resident 2009"})
Renamed09.head()

Unnamed: 0,Marital Status,Lawful Permanent Resident 2009
26,Total,1130818.0
27,Single,417232.0
28,Married,654674.0
29,Widowed,28439.0
30,Divorced/separated,26015.0


In [20]:
#View a single column
Status09 = Lawful09_df.iloc[26:32,0:2]
Name09 = Status09.rename(columns={"Characteristic": "Marital Status", "Total": "Lawful 2009"})
#Set marital status as index for clarity
Name09.set_index("Marital Status", inplace=True)
Name09

#Umar Code ends here

Unnamed: 0_level_0,Lawful 2009
Marital Status,Unnamed: 1_level_1
Total,1130818.0
Single,417232.0
Married,654674.0
Widowed,28439.0
Divorced/separated,26015.0
Unknown,4458.0
