In [1]:
from __future__ import division
import sys
sys.path.append('./../cbp')
import utils
import cbp
import pandas as pd
import secrets
from cbp import Counties

In [2]:
# Example code for downloading from Census API
# texas_api = Counties(state_fips='48', read_from='api', key=secrets.censuskey)
# texas_api.head()
# texas_api.to_csv('texas_cbp_2014.csv', index=False)

In [3]:
# Instantiate Counties object for Texas 2014
texas = Counties(state_fips='48', read_from='csv', filepath='texas_cbp_2014.csv')
texas.head()

Unnamed: 0,EMP,ESTAB,NAICS2012,NAICS2012_TTL,GEO_TTL,state,county
0,11738,940,0,Total for all sectors,Anderson County,48,1
1,5683,403,0,Total for all sectors,Andrews County,48,3
2,30525,1857,0,Total for all sectors,Angelina County,48,5
3,4131,503,0,Total for all sectors,Aransas County,48,7
4,1539,201,0,Total for all sectors,Archer County,48,9


In [4]:
# Identify FIPS codes for our five Austin-Round Rock MSA counties from the data

fips = []
for county in "Bastrop, Caldwell, Hays, Travis, Williamson".split(", "):
    fips.append(texas[(texas.NAICS2012 == '00') & (texas.GEO_TTL.str.contains(county))]['county'].iloc[0])
print fips

['021', '055', '209', '453', '491']


In [5]:
# Get data at the two-digit NAICS level, for only the Austin MSA counties
austin_region = texas.two_digit(county=fips)
austin_region.head()

Unnamed: 0,EMP,ESTAB,NAICS2012,NAICS2012_TTL,GEO_TTL,state,county
10,12475,1175,0,Total for all sectors,Bastrop County,48,21
27,6116,580,0,Total for all sectors,Caldwell County,48,55
104,46675,3660,0,Total for all sectors,Hays County,48,209
225,544038,32217,0,Total for all sectors,Travis County,48,453
244,136393,9252,0,Total for all sectors,Williamson County,48,491


In [6]:
# Use groupby to aggregate data into MSA-level totals

aggs = {
    'EMP':'sum',
    'ESTAB':'sum',
    'NAICS2012_TTL':'first'
}
austin_msa = austin_region.groupby('NAICS2012',as_index=False).agg(aggs)
austin_msa['GEO_TTL'] = 'Austin-Round Rock MSA'
austin_msa.head()

Unnamed: 0,NAICS2012,NAICS2012_TTL,ESTAB,EMP,GEO_TTL
0,0,Total for all sectors,46884,745697,Austin-Round Rock MSA
1,11,"Agriculture, Forestry, Fishing and Hunting",28,21,Austin-Round Rock MSA
2,21,"Mining, Quarrying, and Oil and Gas Extraction",216,2440,Austin-Round Rock MSA
3,22,Utilities,116,2282,Austin-Round Rock MSA
4,23,Construction,3665,45334,Austin-Round Rock MSA


In [7]:
# LOCATION QUOTIENT

# Create series for employment by sector for Texas
texas_total_emp = texas.two_digit().groupby('NAICS2012').agg('sum').EMP
# Create series for employment by sector for Austin
austin_msa_emp = austin_msa.set_index('NAICS2012').EMP

# Use utils.location_quotient to calculate LQ
lq = pd.DataFrame(utils.location_quotient(austin_msa_emp, texas_total_emp))

# Merge LQ results into our DataFrame
austin_msa = austin_msa.merge(lq, left_on='NAICS2012', right_index=True)

In [8]:
# View results
austin_msa

Unnamed: 0,NAICS2012,NAICS2012_TTL,ESTAB,EMP,GEO_TTL,location_quotient
0,00,Total for all sectors,46884,745697,Austin-Round Rock MSA,1.0
1,11,"Agriculture, Forestry, Fishing and Hunting",28,21,Austin-Round Rock MSA,0.069275
2,21,"Mining, Quarrying, and Oil and Gas Extraction",216,2440,Austin-Round Rock MSA,0.160404
3,22,Utilities,116,2282,Austin-Round Rock MSA,0.81696
4,23,Construction,3665,45334,Austin-Round Rock MSA,1.003203
5,31-33,Manufacturing,1299,42592,Austin-Round Rock MSA,0.709544
6,42,Wholesale Trade,2032,34186,Austin-Round Rock MSA,0.886169
7,44-45,Retail Trade,5886,98831,Austin-Round Rock MSA,1.02617
8,48-49,Transportation and Warehousing,790,13551,Austin-Round Rock MSA,0.441452
9,51,Information,1129,31857,Austin-Round Rock MSA,1.831615


In [9]:
# Get 2006 data for change-over-time analysis
texas06 = Counties(state_fips='48', year=2006, read_from='csv', filepath='texas_cbp_2006.csv')

# Get Austin counties and aggregate to MSA
austin_region_06 = texas06.two_digit(county=fips)
austin_msa_06 = austin_region_06.groupby('NAICS2012',as_index=False).agg('sum')
austin_msa_06['GEO_TTL'] = 'Austin-Round Rock MSA'

In [10]:
# Clean up Austin 2006 DataFrame
austin_msa_06 = austin_msa_06.merge(austin_msa[['NAICS2012', 'NAICS2012_TTL']], on='NAICS2012')
austin_msa_06.drop('old_naics', axis=1, inplace=True)

In [11]:
austin_msa_06.head()

Unnamed: 0,NAICS2012,EMP,ESTAB,GEO_TTL,NAICS2012_TTL
0,0,609646,37489,Austin-Round Rock MSA,Total for all sectors
1,11,55,26,Austin-Round Rock MSA,"Agriculture, Forestry, Fishing and Hunting"
2,21,1439,168,Austin-Round Rock MSA,"Mining, Quarrying, and Oil and Gas Extraction"
3,22,789,103,Austin-Round Rock MSA,Utilities
4,23,41128,3307,Austin-Round Rock MSA,Construction


In [12]:
# Get 2006 series for MSA and Texas employment by sector
austin_msa_emp_06 = austin_msa_06.set_index('NAICS2012').EMP
texas_total_emp_06 = texas06.two_digit().groupby('NAICS2012').agg('sum').EMP

In [13]:
# Location Quotient for 2006
lq_06 = pd.DataFrame(utils.location_quotient(austin_msa_emp_06, texas_total_emp_06))
austin_msa_06 = austin_msa_06.merge(lq_06, left_on='NAICS2012', right_index=True)

In [14]:
# Pass the 4 series (2014 and 2006 employment for Texas and for Austin MSA) to shift share function from utils
shift_share, shift_share_summary = utils.shift_share(small_old=austin_msa_emp_06,
                                                     small_new=austin_msa_emp,
                                                     large_old=texas_total_emp_06,
                                                     large_new=texas_total_emp)

In [15]:
# View detailed results by sector
shift_share

Unnamed: 0_level_0,small_old,small_new,large_old,large_new,large_growth_share,large_industry_growth_rate,industry_mix,small_industry_growth_rate,local_competitiveness
NAICS2012,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
00,609646,745697,8711361,9598578,62089.987452,0.101846,0.0,0.223164,73961.012548
11,55,21,5343,3902,5.601528,-0.269699,-20.434955,-0.618182,-19.166573
21,1439,2440,141126,195803,146.556349,0.387434,410.961068,0.695622,443.482583
22,789,2282,28784,35955,80.356469,0.249131,116.208254,1.892269,1296.435277
23,41128,45334,551643,581674,4188.721002,0.054439,-1949.745853,0.102266,1967.024851
31-33,50637,42592,835966,772668,5157.174319,-0.075718,-8991.326456,-0.158876,-4210.847862
42,39017,34186,462570,496565,3973.724162,0.073492,-1106.303199,-0.123818,-7698.420963
44-45,82717,98831,1127808,1239705,8424.392995,0.099216,-217.513678,0.194809,7907.120683
48-49,11020,13551,349109,395123,1122.342575,0.131804,330.138684,0.229673,1078.518741
51,24084,31857,254004,223880,2452.858311,-0.118597,-5309.137803,0.322745,10629.279492


In [16]:
# View summary
shift_share_summary

Unnamed: 0,description,absolute,percentage
small_growth,Growth in smaller geography,136761.0,0.227547
large_growth,Growth in larger geography,784017.0,0.090705
large_growth_share,Growth attributable to larger geography growth...,61211.667818,0.447581
industry_mix,Growth attributable to industry mix,-7224.713613,-0.052827
local_competitiveness,Growth attributable to local competitiveness,82774.045795,0.605246


In [17]:
# Coefficient of Specialization

# Use utils.location_quotient to calculate LQ
cs_sectors, cs = utils.specialization_coefficient(austin_msa_emp, texas_total_emp)
cs_sectors_06, cs_06 = utils.specialization_coefficient(austin_msa_emp_06, texas_total_emp_06)

In [18]:
print cs_06, cs

0.0966522557497 0.111936597686


In [19]:
austin_msa.to_csv('austin_msa_2014.csv', index=False)
austin_msa_06.to_csv('austin_msa_2006.csv', index=False)
cs_sectors.to_csv('specialization_2014.csv')
cs_sectors_06.to_csv('specialization_2006.csv')
shift_share.to_csv('shift_share_2006_2014.csv')