In [1]:
from __future__ import division
import sys
sys.path.append('./../cbp')
import utils
import cbp
import pandas as pd
import secrets
from cbp import Counties

In [2]:
# Example code for downloading from Census API
# texas_api = Counties(state_fips='48', read_from='api', key=secrets.censuskey)
# texas_api.head()
# texas_api.to_csv('texas_cbp_2014.csv', index=False)

In [3]:
# Instantiate Counties object for Texas 2014
texas = Counties(state_fips='48', read_from='csv', filepath='texas_cbp_2014.csv')
texas.head()

Unnamed: 0,EMP,ESTAB,NAICS2012,NAICS2012_TTL,GEO_TTL,state,county
0,11738,940,0,Total for all sectors,Anderson County,48,1
1,5683,403,0,Total for all sectors,Andrews County,48,3
2,30525,1857,0,Total for all sectors,Angelina County,48,5
3,4131,503,0,Total for all sectors,Aransas County,48,7
4,1539,201,0,Total for all sectors,Archer County,48,9


In [4]:
# Identify FIPS codes for our five Austin-Round Rock MSA counties from the data

fips = []
for county in "Bastrop, Caldwell, Hays, Travis, Williamson".split(", "):
    fips.append(texas[(texas.NAICS2012 == '00') & (texas.GEO_TTL.str.contains(county))]['county'].iloc[0])
print fips

['021', '055', '209', '453', '491']


In [5]:
# Get data at the two-digit NAICS level, for only the Austin MSA counties
austin_region = texas.two_digit(county=fips)
austin_region.head()

Unnamed: 0,EMP,ESTAB,NAICS2012,NAICS2012_TTL,GEO_TTL,state,county
10,12475,1175,0,Total for all sectors,Bastrop County,48,21
27,6116,580,0,Total for all sectors,Caldwell County,48,55
104,46675,3660,0,Total for all sectors,Hays County,48,209
225,544038,32217,0,Total for all sectors,Travis County,48,453
244,136393,9252,0,Total for all sectors,Williamson County,48,491


In [6]:
# Use groupby to aggregate data into MSA-level totals

aggs = {
    'EMP':'sum',
    'ESTAB':'sum',
    'NAICS2012_TTL':'first'
}
austin_msa = austin_region.groupby('NAICS2012',as_index=False).agg(aggs)
austin_msa['GEO_TTL'] = 'Austin-Round Rock MSA'
austin_msa.head()

Unnamed: 0,NAICS2012,NAICS2012_TTL,ESTAB,EMP,GEO_TTL
0,0,Total for all sectors,46884,745697,Austin-Round Rock MSA
1,11,"Agriculture, Forestry, Fishing and Hunting",28,61,Austin-Round Rock MSA
2,21,"Mining, Quarrying, and Oil and Gas Extraction",216,2440,Austin-Round Rock MSA
3,22,Utilities,116,2457,Austin-Round Rock MSA
4,23,Construction,3665,45334,Austin-Round Rock MSA


In [7]:
# LOCATION QUOTIENT

# Create series for employment by sector for Texas
texas_total_emp = texas.two_digit().groupby('NAICS2012').agg('sum').EMP
# Create series for employment by sector for Austin
austin_msa_emp = austin_msa.set_index('NAICS2012').EMP

# Use utils.location_quotient to calculate LQ
lq = pd.DataFrame(utils.location_quotient(austin_msa_emp, texas_total_emp))

# Merge LQ results into our DataFrame
austin_msa = austin_msa.merge(lq, left_on='NAICS2012', right_index=True)

In [8]:
# View results
austin_msa

Unnamed: 0,NAICS2012,NAICS2012_TTL,ESTAB,EMP,GEO_TTL,location_quotient
0,00,Total for all sectors,46884,745697,Austin-Round Rock MSA,1.0
1,11,"Agriculture, Forestry, Fishing and Hunting",28,61,Austin-Round Rock MSA,0.085429
2,21,"Mining, Quarrying, and Oil and Gas Extraction",216,2440,Austin-Round Rock MSA,0.151627
3,22,Utilities,116,2457,Austin-Round Rock MSA,0.558529
4,23,Construction,3665,45334,Austin-Round Rock MSA,0.986453
5,31-33,Manufacturing,1299,42592,Austin-Round Rock MSA,0.675397
6,42,Wholesale Trade,2032,34186,Austin-Round Rock MSA,0.876207
7,44-45,Retail Trade,5886,98831,Austin-Round Rock MSA,1.024317
8,48-49,Transportation and Warehousing,790,13926,Austin-Round Rock MSA,0.439811
9,51,Information,1129,31857,Austin-Round Rock MSA,1.775642


In [9]:
# Get 2006 data for change-over-time analysis
texas06 = Counties(state_fips='48', year=2006, read_from='csv', filepath='texas_cbp_2006.csv')

# Get Austin counties and aggregate to MSA
austin_region_06 = texas06.two_digit(county=fips)
austin_msa_06 = austin_region_06.groupby('NAICS2012',as_index=False).agg('sum')
austin_msa_06['GEO_TTL'] = 'Austin-Round Rock MSA'

In [10]:
# Clean up Austin 2006 DataFrame
austin_msa_06 = austin_msa_06.merge(austin_msa[['NAICS2012', 'NAICS2012_TTL']], on='NAICS2012')
austin_msa_06.drop('old_naics', axis=1, inplace=True)

In [11]:
austin_msa_06.head()

Unnamed: 0.1,NAICS2012,Unnamed: 0,EMP,ESTAB,GEO_TTL,NAICS2012_TTL
0,0,331581,595807,36026,Austin-Round Rock MSA,Total for all sectors
1,11,331584,57,20,Austin-Round Rock MSA,"Agriculture, Forestry, Fishing and Hunting"
2,21,331610,1230,135,Austin-Round Rock MSA,"Mining, Quarrying, and Oil and Gas Extraction"
3,22,331658,1164,87,Austin-Round Rock MSA,Utilities
4,23,331698,40177,3124,Austin-Round Rock MSA,Construction


In [12]:
# Get 2006 series for MSA and Texas employment by sector
austin_msa_emp_06 = austin_msa_06.set_index('NAICS2012').EMP
texas_total_emp_06 = texas06.two_digit().groupby('NAICS2012').agg('sum').EMP

In [13]:
# Location Quotient for 2006
lq_06 = pd.DataFrame(utils.location_quotient(austin_msa_emp_06, texas_total_emp_06))
austin_msa_06 = austin_msa_06.merge(lq_06, left_on='NAICS2012', right_index=True)

In [14]:
# Pass the 4 series (2014 and 2006 employment for Texas and for Austin MSA) to shift share function from utils
shift_share, shift_share_summary = utils.shift_share(small_old=austin_msa_emp_06,
                                                     small_new=austin_msa_emp,
                                                     large_old=texas_total_emp_06,
                                                     large_new=texas_total_emp)

In [15]:
# View detailed results by sector
shift_share

Unnamed: 0_level_0,small_old,small_new,large_old,large_new,large_growth_share,large_industry_growth_rate,industry_mix,small_industry_growth_rate,local_competitiveness
NAICS2012,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
00,595807,745697,8711481,9599523,60736.1297,0.101939,0.0,0.251575,89153.8703
11,57,61,8643,9192,5.810538,0.06352,-2.18992,0.070175,0.379382
21,1230,2440,144246,207158,125.3853,0.436144,411.071586,0.98374,673.543114
22,1164,2457,41304,56630,118.657308,0.371054,313.249141,1.110825,861.09355
23,40177,45334,554343,591609,4095.613987,0.067226,-1394.694012,0.128357,2456.080024
31-33,48998,42592,847541,811813,4994.820274,-0.042155,-7060.325712,-0.13074,-4340.494563
42,38617,34186,463860,502260,3936.588729,0.082784,-739.734505,-0.114742,-7627.854223
44-45,79662,98831,1128328,1242070,8120.686001,0.100806,-90.294835,0.240629,11138.608834
48-49,10740,13926,353849,407613,1094.827743,0.151941,537.01341,0.296648,1554.158847
51,23934,31857,256514,230960,2439.81445,-0.09962,-4824.126558,0.331035,10307.312108


In [16]:
# View summary
shift_share_summary

Unnamed: 0,description,absolute,percentage
small_growth,Growth in smaller geography,149767.0,0.250809
large_growth,Growth in larger geography,899327.0,0.103041
large_growth_share,Growth attributable to larger geography growth...,60871.606988,0.406442
industry_mix,Growth attributable to industry mix,86.644944,0.000579
local_competitiveness,Growth attributable to local competitiveness,88808.748069,0.592979


In [17]:
# Coefficient of Specialization

# Use utils.location_quotient to calculate LQ
cs_sectors, cs = utils.specialization_coefficient(austin_msa_emp, texas_total_emp)
cs_sectors_06, cs_06 = utils.specialization_coefficient(austin_msa_emp_06, texas_total_emp_06)

In [18]:
print cs_06, cs

0.0943337058033 0.111008559348


In [19]:
austin_msa.to_csv('austin_msa_2014.csv', index=False)
austin_msa_06.to_csv('austin_msa_2006.csv', index=False)
cs_sectors.to_csv('specialization_2014.csv')
cs_sectors_06.to_csv('specialization_2006.csv')
shift_share.to_csv('shift_share_2006_2014.csv')