#### 1. Create Database and Tables

In [1]:
%run 'create_tables.py'

#### 2. Processing Data and Inserting into Tables

In [2]:
%run 'etl.py'

immigration_table_check: SQL Tests Passed
demographic_table_check: SQL Tests Passed
crime_table_check: SQL Tests Passed
date_table_check: SQL Tests Passed
temperature_table_check: SQL Tests Passed


#### 3. Query Examples

In [3]:
%load_ext sql
%sql postgresql://student:student@127.0.0.1/immigrationdb

'Connected: student@immigrationdb'

In [4]:
# Example analysis1 - 1: Which are the 5 most popular as destination for immigrants?
%sql SELECT \
current_state_name, cnt, rank FROM \
(SELECT current_state_name, count(id) as cnt, RANK() OVER(ORDER BY count(id) DESC) \
FROM immigration GROUP BY current_state_name) as count WHERE rank < 6;

 * postgresql://student:***@127.0.0.1/immigrationdb
5 rows affected.


current_state_name,cnt,rank
FLORIDA,5091,1
NEW YORK,4454,2
CALIFORNIA,3887,3
HAWAII,1419,4
TEXAS,1107,5


In [5]:
# Example analysis1 - 2:  What is the weather like in those states?
%sql WITH state as (SELECT \
current_state_name, cnt, rank FROM \
(SELECT current_state_name, count(id) as cnt, RANK() OVER(ORDER BY count(id) DESC) \
FROM immigration GROUP BY current_state_name) as count WHERE rank < 6) \
SELECT * FROM temperature INNER JOIN state ON state.current_state_name = temperature.state ORDER BY state, county;

 * postgresql://student:***@127.0.0.1/immigrationdb
54 rows affected.


county,state,avg_temperature,max_of_monthly_avg_temperature,min_of_monthly_avg_temperature,current_state_name,cnt,rank
Los Angeles,CALIFORNIA,15.88,27.34,4.02,CALIFORNIA,3887,3
Orange,CALIFORNIA,16.12,23.11,8.93,CALIFORNIA,3887,3
Riverside,CALIFORNIA,17.06,30.59,1.6,CALIFORNIA,3887,3
San Bernardino,CALIFORNIA,17.06,30.59,1.6,CALIFORNIA,3887,3
San Diego,CALIFORNIA,16.12,23.11,8.93,CALIFORNIA,3887,3
San Francisco,CALIFORNIA,14.45,22.46,4.4,CALIFORNIA,3887,3
Solano,CALIFORNIA,14.45,22.46,4.4,CALIFORNIA,3887,3
Tulare,CALIFORNIA,15.82,28.9,2.08,CALIFORNIA,3887,3
Ventura,CALIFORNIA,15.88,27.34,4.02,CALIFORNIA,3887,3
Broward,FLORIDA,23.07,30.13,12.96,FLORIDA,5091,1


In [6]:
# Example analysis1 - 3:  What is the ratio of ethnicities in those states?
%sql WITH state as (SELECT \
current_state_name, cnt, rank FROM \
(SELECT current_state_name, count(id) as cnt, RANK() OVER(ORDER BY count(id) DESC) \
FROM immigration GROUP BY current_state_name) as count WHERE rank < 6) \
SELECT \
demographic.state, \
AmericanIndian_and_AlaskaNative_population, Asian_population, Black_or_AfricanAmerican_population, \
Hispanic_or_Latino_population, White_population, \
ROUND(CAST(AmericanIndian_and_AlaskaNative_population AS numeric)/CAST(population AS numeric),2) AmericanIndian_and_AlaskaNative_population_percent,\
ROUND(CAST(Asian_population AS numeric)/CAST(population AS numeric),2)  Asian_population_percent,\
ROUND(CAST(Black_or_AfricanAmerican_population AS numeric)/CAST(population AS numeric),2) Black_or_AfricanAmerican_population_percent,\
ROUND(CAST(Hispanic_or_Latino_population AS numeric)/CAST(population AS numeric),2) Hispanic_or_Latino_population_percent \
FROM demographic INNER JOIN state ON state.current_state_name = demographic.state;

 * postgresql://student:***@127.0.0.1/immigrationdb
5 rows affected.


state,americanindian_and_alaskanative_population,asian_population,black_or_africanamerican_population,hispanic_or_latino_population,white_population,americanindian_and_alaskanative_population_percent,asian_population_percent,black_or_africanamerican_population_percent,hispanic_or_latino_population_percent
CALIFORNIA,401386,4543730,2047009,9856464,14905129,0.01,0.14,0.06,0.31
FLORIDA,46759,264933,1652619,1942022,4758144,0.01,0.03,0.19,0.22
HAWAII,5592,240978,11781,24586,110508,0.01,0.61,0.03,0.06
NEW YORK,112816,1373373,2605008,2730185,4555686,0.01,0.12,0.23,0.24
TEXAS,154497,924552,2130242,6311431,10508923,0.01,0.05,0.11,0.32


In [7]:
# Example analysis1 - 4:  What are the crime situations in those states?
%sql WITH state as (SELECT \
current_state_name, cnt, rank FROM \
(SELECT current_state_name, count(id) as cnt, RANK() OVER(ORDER BY count(id) DESC) \
FROM immigration GROUP BY current_state_name) as count WHERE rank < 6) \
SELECT crime.state, SUM(total_crime) total_crime, SUM(violent_crime) violent, ROUND(CAST(avg(crime_rate) as numeric),2) crime_rate, ROUND(CAST(avg(violent_crime_rate) as numeric),2) violent_crime_rate \
FROM crime \
INNER JOIN state ON state.current_state_name = crime.state GROUP BY crime.state;

 * postgresql://student:***@127.0.0.1/immigrationdb
5 rows affected.


state,total_crime,violent,crime_rate,violent_crime_rate
FLORIDA,265134,39074,3040.81,626.37
TEXAS,148719,21060,1572.15,178.65
NEW YORK,33134,2930,1383.74,139.68
CALIFORNIA,136699,24994,1015.2,206.76
HAWAII,6787,557,4215.2,345.9


In [8]:
# Example analysis2: What is the stats of visa type of immigrants coming to US?
%sql SELECT visa_type, visa_type_name, count(id) \
FROM immigration GROUP BY visa_type, visa_type_name

 * postgresql://student:***@127.0.0.1/immigrationdb
3 rows affected.


visa_type,visa_type_name,count
3,Student,10
2,Pleasure,19838
1,Business,3878


In [9]:
# Example analysis3: Demograhic stats of immigrants by origin country.
%sql SELECT origin_country_name, COUNT(CASE WHEN gender =  'F' THEN id end) as F, COUNT(CASE WHEN gender = 'M' THEN id end) as M, ROUND(CAST(avg(age) as numeric),2) \
FROM immigration GROUP BY origin_country_name ORDER BY origin_country_name ASC

 * postgresql://student:***@127.0.0.1/immigrationdb
153 rows affected.


origin_country_name,f,m,round
ALGERIA,1,6,39.0
ANDORRA,0,1,75.0
ANGOLA,1,1,23.5
ANTIGUA-BARBUDA,7,5,42.17
ARGENTINA,340,330,39.79
ARMENIA,2,1,53.33
AUSTRALIA,407,368,40.44
AUSTRIA,52,63,43.0
AZERBAIJAN,1,0,32.0
BAHAMAS,64,49,40.47
