In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
import requests
from pprint import pprint
pd.options.display.float_format = '{0:,.2f}'.format

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

EPA_state_summary = []

# State Abbreviations
states = ["AL", "AR", "CA", "CO", "DC", "DE", "FL", "HI", "ID",
          "IL", "IN", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MS", "MO",
          "MT", "NE", "NV", "NH", "NJ", "NM", "NC", "ND", "OH", "OK", "OR", "PA",
          "SC", "SD", "TN", "TX", "UT", "VT", "WA", "WV", "WI", "WY"]

for state in states:

    # File to Load
    EPADataFile = "../1_Input/EPA/water_system_summary_" + state + ".csv"
    
    # Load to dataframe
    EPA_State_data = pd.read_csv(EPADataFile)
    
    # Drop columns to minimize resource usage
    EPA_State_data = EPA_State_data.drop(columns = ['PWS Name', 'PWS Type', 'Primary Source', 'Counties Served',
                                 'Cities Served', 'Number of Facilities', 'Number of Site Visits'])
    
    
    # Get specific variables
    totalWaterSystems = len(EPA_State_data)
    populationServed = EPA_State_data["PopulationServed Count"].sum()
    totalViolations = EPA_State_data["Number of Violations"].sum()
    
    EPA_state_summary.append([state, totalWaterSystems, populationServed, totalViolations])

# Create Summary Data Frame
EPA_Violations_df = pd.DataFrame(EPA_state_summary, columns=['State',
                                                    'Number of Water Systems',
                                                    'Population Served',
                                                    'Number of Violations'])

EPA_Violations_df.head()

Unnamed: 0,State,Number of Water Systems,Population Served,Number of Violations
0,AL,581,5683911,8723
1,AR,1054,2888839,13990
2,CA,7499,42679064,70484
3,CO,2013,6133087,51549
4,DC,6,665602,78


In [7]:
national_agency_base = "https://api.usa.gov/crime/fbi/sapi/api/participation/national?api_key={myapi_key}"
violent_state_base = "https://api.usa.gov/crime/fbi/sapi/api/data/nibrs/violent-crime/offense/states/"
api_key_url = "/count?api_key="
myapi_key = "FaiahkB1PpyTF7e8qEp6JZ3TnOmokQ2sW2OTU099"
states = ["AL", "AR", "CA", "CO", "DC", "DE", "FL", "HI", "ID",
          "IL", "IN", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MS", "MO",
          "MT", "NE", "NV", "NH", "NJ", "NM", "NC", "ND", "OH", "OK", "OR", "PA",
          "SC", "SD", "TN", "TX", "UT", "VT", "WA", "WV", "WI", "WY"]
state_list = []
year_list = []
offense_name_list = []
offense_count_list = []

for state in states:
    violent_state_url = violent_state_base + state + api_key_url + myapi_key 
    response = requests.get(violent_state_url).json()

    if int(response['pagination']['count'])!=0:
        for i in range(len(response['results'])):
            #print()
            if response['results'][i]['data_year'] in range(1999,2017): 
                state_list.append(state)
                year_list.append(response['results'][i]['data_year'])
                offense_name_list.append(response['results'][i]['offense_name'])
                offense_count_list.append(response['results'][i]['offense_count'])
        #print(state)
        #pprint(response)

In [8]:
FBI_data_all = pd.DataFrame({'State': state_list,
                         'Year': year_list,
                         'Offense': offense_name_list,
                         'Offense Count': offense_count_list
                        })

In [9]:
index = FBI_data_all.groupby(["State", "Year"])
offcount = FBI_data_all.groupby("Year")["Offense Count"].sum()
FBI_df_all = pd.DataFrame(index["Offense Count"].sum())
FBI_df_all.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Offense Count
State,Year,Unnamed: 2_level_1
AL,2006,170
AL,2007,187
AL,2008,145
AL,2009,121
AL,2010,105


In [None]:
# Build the plots for each city 
first_graph = FBI_df.plot.bar(rot=0, subplots=True)

# Incorporate the other graph properties
plt.xlabel("US States")
plt.ylabel("Instances of Violent Crime")
plt.title("Instances of Violent Crime in the United States")
#plt.xticks(np.arange(0, 41, 5))
#plt.yticks(np.arange(20, 41, 5))
#plt.grid(b=None, which='major', axis='both')

plt.gcf().set_size_inches(14, 14)
plt.tight_layout()
# Save Figure
#plt.savefig('Instances of Violent Crime in the USA.png', bbox_inches="tight")

# Show Figure
plt.show()