In [2]:
import os
import warnings

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 1000)

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (20.0, 10.0)
import seaborn as sns

with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)

Data source: Colorado Information Marketplace https://data.colorado.gov/Nonprofit-Data/Charity-Filed-Financial-Information-in-Colorado/37wu-kn3g

In [5]:
char_tmp = pd.read_csv('../../npsg_datafiles/Charity_Filed_Financial_Information_in_Colorado.csv', encoding = 'latin-1') 

  interactivity=interactivity, compiler=compiler, result=result)


In [16]:
char_tmp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59097 entries, 0 to 59096
Data columns (total 93 columns):
entityId                                                    59097 non-null int64
registrantTypeAbbr                                          59097 non-null object
name                                                        59097 non-null object
fein                                                        58232 non-null float64
filingType                                                  59097 non-null object
documentId                                                  59089 non-null float64
amendmentEntityId                                           52345 non-null object
externalFilingFromNccs                                      59097 non-null bool
externalFilingType                                          4 non-null object
actualFinancialsProvided                                    59033 non-null object
principalAddress                                            59096 non-null o

#### Counts regardless of whether the principal state is Colorado

In [10]:
char_tmp['entityId'].groupby(char_tmp['nteeCode1']).count()

nteeCode1
ANIMALS                                                3559
ARTS,CULTURE & HUMANITIES                              7612
CIVIL RIGHTS,SOCIAL ACTION,& ADVOCACY                  3788
COMMUNITY IMPROVEMENT,CAPACITY BUILDING                4244
CRIME,LEGAL                                             452
DISEASES,DISORDERS,& MEDICAL DISCIPLINES               2756
EDUCATION                                             12795
EMPLOYMENT                                              244
ENVIRONMENT QUALITY,PROTECTION & BEAUTIFICATION        1498
FOOD,AGRICULTURE,& NUTRITION                            909
HEALTH                                                 3080
HOUSING,SHELTER                                        1375
HUMAN SERVICES                                         3568
INTERNATIONAL,FOREIGN AFFAIRS, & NATIONAL SECURITY      341
MEDICAL RESEARCH                                        287
MENTAL HEALTH,CRISIS INTERVENTION                       298
MUTUAL,MEMBERSHIP BENEFIT     

#### Should we restrict charities to those whose principalState is Colorado? Could also try to use organizationEstablishedState and incorporationState

In [11]:
is_colorado = char_tmp['principalState'] == 'CO'

In [12]:
char_tmp_co = char_tmp[is_colorado]

#### Summarize by activity code where principalState is Colorado

In [13]:
char_tmp_co['entityId'].groupby(char_tmp_co['nteeCode1']).count()

nteeCode1
ANIMALS                                               2219
ARTS,CULTURE & HUMANITIES                             6473
CIVIL RIGHTS,SOCIAL ACTION,& ADVOCACY                 1807
COMMUNITY IMPROVEMENT,CAPACITY BUILDING               3224
CRIME,LEGAL                                            249
DISEASES,DISORDERS,& MEDICAL DISCIPLINES              1007
EDUCATION                                             8636
EMPLOYMENT                                             164
ENVIRONMENT QUALITY,PROTECTION & BEAUTIFICATION        952
FOOD,AGRICULTURE,& NUTRITION                           613
HEALTH                                                1790
HOUSING,SHELTER                                       1042
HUMAN SERVICES                                        2038
INTERNATIONAL,FOREIGN AFFAIRS, & NATIONAL SECURITY      64
MEDICAL RESEARCH                                        31
MENTAL HEALTH,CRISIS INTERVENTION                      220
MUTUAL,MEMBERSHIP BENEFIT                     

#### Looks like there is a little cleanup to be done on residentialCounty. Also, need a definition of residentialCounty. 

In [14]:
char_tmp_co['entityId'].groupby(char_tmp_co['residentialCounty']).count()

residentialCounty
ADAMS                       926
ALAMOSA                     415
ARAPAHOE                   2149
ARCHULETA                   395
BACA                          7
BENT                         25
BEXAR                         5
BOULDER                    2895
BROOMFIELD                  251
CALIFORNIA                    2
CHAFFEE                     352
CHEYENNE                      2
CLEAR CREEK                 195
CO                          137
CO - COLORADO                17
COLORADO                    211
COLORADO (CO)                24
COLORADO [CO]                 3
CONEJOS                      28
CONTRA COSTA                  3
COSTILLA                     81
CROWLEY                       1
CUSTER                      232
DELTA                       475
DENVER                     4096
DOLORES                      37
DOUGLAS                    1137
EAGLE                      1075
EL PASO                    3493
ELBERT                       88
FREDERICKSBURG (INDEP.

#### Look at spending on program services. Could compute per capita figure after acquiring county population

In [15]:
char_tmp_co['expensesFromProgramServices'].groupby(char_tmp_co['residentialCounty']).describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
residentialCounty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ADAMS,909.0,3121984.0,55808290.0,-596228000.0,8149.24,44336.0,144371.0,941880000.0
ALAMOSA,412.0,498405.7,2574445.0,0.0,17271.42,116114.0,357553.9,27160110.0
ARAPAHOE,2129.0,400230.9,1356761.0,0.0,8261.18,49650.0,253638.0,25104900.0
ARCHULETA,393.0,121870.8,187696.2,-26392.0,8989.0,40000.0,163338.0,1225337.0
BACA,7.0,7150.404,9890.419,0.0,0.0,0.0,12526.42,25000.0
BENT,25.0,40243.24,44258.21,0.0,0.0,43709.0,70387.0,172329.0
BEXAR,5.0,187160.0,134834.4,625.0,118157.0,225587.0,232000.0,359431.0
BOULDER,2887.0,2704821.0,21629490.0,0.0,17062.0,68166.0,248591.5,272592600.0
BROOMFIELD,250.0,181493.0,320237.0,0.0,24507.75,69562.5,194679.8,2190045.0
CALIFORNIA,2.0,406681.5,34957.24,381963.0,394322.2,406681.5,419040.8,431400.0
