**How to Query the IRS 990 Data (BigQuery Dataset)**

In [1]:
import bq_helper
from bq_helper import BigQueryHelper
# https://www.kaggle.com/sohier/introduction-to-the-bq-helper-package
irs = bq_helper.BigQueryHelper(active_project="bigquery-public-data",
                                   dataset_name="irs_990")

In [2]:
bq_assistant = BigQueryHelper("bigquery-public-data", "irs_990")
bq_assistant.list_tables()

['irs_990_2012',
 'irs_990_2013',
 'irs_990_2014',
 'irs_990_2015',
 'irs_990_2016',
 'irs_990_ein',
 'irs_990_ez_2012',
 'irs_990_ez_2013',
 'irs_990_ez_2014',
 'irs_990_ez_2015',
 'irs_990_ez_2016',
 'irs_990_pf_2012',
 'irs_990_pf_2013',
 'irs_990_pf_2014',
 'irs_990_pf_2015']

In [3]:
bq_assistant.head("irs_990_2016", num_rows=10)

Unnamed: 0,ein,elf,tax_pd,subseccd,s501c3or4947a1cd,schdbind,politicalactvtscd,lbbyingactvtscd,subjto6033cd,dnradvisedfundscd,...,exceeds1pct509,subtotpub509,pubsupplesub509,samepubsuppsubtot509,grsinc509,unreltxincls511tx509,subtotsuppinc509,netincunrelatd509,othrinc509,totsupp509
0,611120986,N,201512,7,N,N,N,N,N,N,...,0,0,0,0,0,0,0,0,0,0
1,956056761,Y,201512,19,N,Y,N,N,N,N,...,0,0,0,0,0,0,0,0,0,0
2,60674079,Y,201510,7,N,N,N,N,N,N,...,0,0,0,0,0,0,0,0,0,0
3,626043424,Y,201607,5,N,N,N,N,N,N,...,0,0,0,0,0,0,0,0,0,0
4,135459420,Y,201512,6,N,Y,N,N,Y,N,...,0,0,0,0,0,0,0,0,0,0
5,350424700,N,201512,12,N,N,Y,N,N,N,...,0,0,0,0,0,0,0,0,0,0
6,350568161,N,201512,12,N,N,Y,N,N,N,...,0,0,0,0,0,0,0,0,0,0
7,410229404,Y,201505,8,N,N,N,N,N,N,...,0,0,0,0,0,0,0,0,0,0
8,410328845,Y,201503,7,N,N,N,N,N,N,...,0,0,0,0,0,0,0,0,0,0
9,951664056,Y,201512,5,N,N,N,N,N,N,...,0,0,0,0,0,0,0,0,0,0


In [4]:
bq_assistant.table_schema("irs_990_2016")

[SchemaField('ein', 'STRING', 'REQUIRED', 'Employer Identification Number', ()),
 SchemaField('elf', 'STRING', 'NULLABLE', 'E-file indicator', ()),
 SchemaField('tax_pd', 'INTEGER', 'NULLABLE', 'Tax period', ()),
 SchemaField('subseccd', 'INTEGER', 'NULLABLE', 'Subsection code', ()),
 SchemaField('s501c3or4947a1cd', 'STRING', 'NULLABLE', 'Described in 501(c)(3)?', ()),
 SchemaField('schdbind', 'STRING', 'NULLABLE', 'Schedule B required?', ()),
 SchemaField('politicalactvtscd', 'STRING', 'NULLABLE', 'Political activities?', ()),
 SchemaField('lbbyingactvtscd', 'STRING', 'NULLABLE', 'Lobbying activities?', ()),
 SchemaField('subjto6033cd', 'STRING', 'NULLABLE', 'Subject to proxy tax?', ()),
 SchemaField('dnradvisedfundscd', 'STRING', 'NULLABLE', 'Donor advised funds?', ()),
 SchemaField('prptyintrcvdcd', 'STRING', 'NULLABLE', 'Conservation easements?', ()),
 SchemaField('maintwrkofartcd', 'STRING', 'NULLABLE', 'Collections of art?', ()),
 SchemaField('crcounselingqstncd', 'STRING', 'NULL

What organizations  filed tax exempt status in 2015?



In [5]:
query1 = """SELECT
  irsein.name AS name,
  irsein.state AS state,
  irsein.city AS city,
  irs990.totrevenue AS revenue,
  irs990.noemplyeesw3cnt AS employees,
  irs990.noindiv100kcnt AS employees_over_100k,
  irs990.compnsatncurrofcr AS officers_comp
FROM
  `bigquery-public-data.irs_990.irs_990_ein` AS irsein
JOIN
  `bigquery-public-data.irs_990.irs_990_2015` AS irs990
USING (ein)
ORDER BY
  revenue DESC;
        """
response1 = irs.query_to_pandas_safe(query1, max_gb_scanned=10)
response1.head(50)

Unnamed: 0,name,state,city,revenue,employees,employees_over_100k,officers_comp
0,KAISER FOUNDATION HEALTH PLAN INC,CA,OAKLAND,45409123226,22656,7065,68314072
1,KAISER FOUNDATION HOSPITALS,OR,PORTLAND,20796549014,66570,25007,0
2,PARTNERS HEALTHCARE SYSTEM INC,MA,SOMERVILLE,11091388129,63919,9629,66230068
3,UPMC,PA,PITTSBURGH,10098163008,59159,4536,90900288
4,UAW RETIREE MEDICAL BENEFITS TR,MI,DETROIT,9890722789,112,45,3541179
5,THRIVENT FINANCIAL FOR LUTHERANS,MN,MINNEAPOLIS,9475129863,7766,1723,19276753
6,THRIVENT FINANCIAL FOR LUTHERANS,MN,MINNEAPOLIS,9021585970,7929,1636,17474648
7,DIGNITY HEALTH,CA,SAN FRANCISCO,8655129029,49928,11282,43758982
8,CLEVELAND CLINIC FOUNDATION,OH,INDEPENDENCE,7523260077,51023,5193,52336784
9,PRESIDENT AND FELLOWS OF HARVARD COLLEGE,MA,CAMBRIDGE,6740015230,34359,3401,8478834
