# UCR Violent Crimes per Capita by State Cleaning
- acquired by using the UCR data tool www.ucrdatatool.gov
- voluntarily reporting jurisdictions submit crime data to the FBI, along with the population inside the jurisdiction
    - this is used to calculate a violent crime rate for the jurisdiction (violent crimes per 100,000 people)
    - results represent the mean of all reporting jurisdictions in the state
- columns are states, rows are years, fields are violent crimes per 100,000 people
- dataset is very clean, just need to change the way it is structured

### Dependencies

In [1]:
import pandas as pd
import numpy as np
import requests
import json

### Read in CSV and create Dataframe

In [75]:
filepath = 'data/raw/ucr_violent_crime_rate_by_state.csv'
csv = pd.read_csv(filepath)
df = pd.DataFrame(csv)

### View head and dtypes

In [76]:
df.head()

Unnamed: 0,Year,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District of Columbia,...,South Dakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West Virginia,Wisconsin,Wyoming
0,1990,708.6,524.5,652.4,532.2,1045.2,526.0,553.7,655.2,2458.2,...,162.8,670.4,761.4,283.9,127.2,350.6,501.6,169.3,264.7,301.4
1,1991,844.2,613.9,670.7,593.3,1089.9,559.3,539.7,714.3,2453.3,...,182.2,725.9,840.1,286.8,116.8,373.2,522.6,191.0,277.0,310.2
2,1992,871.7,660.5,670.8,576.5,1119.7,578.8,495.3,621.2,2832.8,...,194.5,746.2,806.3,290.5,109.5,374.9,534.5,211.5,275.7,319.5
3,1993,780.4,760.8,715.0,593.3,1077.8,567.3,456.2,685.9,2921.8,...,208.4,765.8,762.1,301.0,114.2,372.2,514.6,208.4,264.4,286.2
4,1994,683.7,766.3,703.1,595.1,1013.0,509.6,455.5,644.3,2662.6,...,227.6,747.9,706.5,304.5,96.9,357.7,511.3,215.8,270.5,272.5


In [77]:
df = df.set_index('Year').T
df.index.names = ['State']
df = df.reset_index()

In [78]:
df.head()


Year,State,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014
0,Alabama,708.6,844.2,871.7,780.4,683.7,632.4,565.4,564.5,512.1,...,433.0,425.2,448.9,452.8,450.1,383.7,419.8,450.3,431.0,427.4
1,Alaska,524.5,613.9,660.5,760.8,766.3,770.9,727.7,701.1,653.9,...,632.0,688.0,661.3,652.1,633.4,635.3,610.1,604.1,638.7,635.8
2,Arizona,652.4,670.7,670.8,715.0,703.1,713.5,631.5,623.7,577.9,...,512.0,542.6,518.0,485.6,426.5,413.6,414.2,428.6,415.6,399.9
3,Arkansas,532.2,593.3,576.5,593.3,595.1,553.2,524.3,526.9,490.2,...,528.0,552.8,537.1,513.0,515.8,503.5,482.3,469.6,463.2,480.1
4,California,1045.2,1089.9,1119.7,1077.8,1013.0,966.0,862.7,798.3,703.7,...,526.0,533.5,524.1,504.2,473.3,439.6,411.2,423.5,402.6,396.1


### Reorganize DataFrame

Unnamed: 0,Year,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District of Columbia,...,South Dakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West Virginia,Wisconsin,Wyoming
0,1990,708.6,524.5,652.4,532.2,1045.2,526.0,553.7,655.2,2458.2,...,162.8,670.4,761.4,283.9,127.2,350.6,501.6,169.3,264.7,301.4
1,1991,844.2,613.9,670.7,593.3,1089.9,559.3,539.7,714.3,2453.3,...,182.2,725.9,840.1,286.8,116.8,373.2,522.6,191.0,277.0,310.2
2,1992,871.7,660.5,670.8,576.5,1119.7,578.8,495.3,621.2,2832.8,...,194.5,746.2,806.3,290.5,109.5,374.9,534.5,211.5,275.7,319.5
3,1993,780.4,760.8,715.0,593.3,1077.8,567.3,456.2,685.9,2921.8,...,208.4,765.8,762.1,301.0,114.2,372.2,514.6,208.4,264.4,286.2
4,1994,683.7,766.3,703.1,595.1,1013.0,509.6,455.5,644.3,2662.6,...,227.6,747.9,706.5,304.5,96.9,357.7,511.3,215.8,270.5,272.5


In [7]:
coordinateList = requests.get('http://eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_040_00_20m.json').json()

In [8]:
print(json.dumps(coordinateList['features'][0], indent=4))

{
    "type": "Feature",
    "properties": {
        "GEO_ID": "0400000US04",
        "STATE": "04",
        "NAME": "Arizona",
        "LSAD": "",
        "CENSUSAREA": 113594.084
    },
    "geometry": {
        "type": "Polygon",
        "coordinates": [
            [
                [
                    -112.538593,
                    37.000674
                ],
                [
                    -112.534545,
                    37.000684
                ],
                [
                    -112.368946,
                    37.001125
                ],
                [
                    -112.35769,
                    37.001025
                ],
                [
                    -111.412784,
                    37.001478
                ],
                [
                    -111.405869,
                    37.001481
                ],
                [
                    -111.405517,
                    37.001497
                ],
                [
           

pandas.core.frame.DataFrame

In [25]:
stateType = []
stateID = []
stateName = []
geometry = []


In [26]:
for feature in coordinateList['features']:
    stateType.append(feature['type'])
    stateID.append(feature['properties']['STATE'])
    stateName.append(feature['properties']['NAME'])
    geometry.append(feature['geometry'])

In [31]:
allCoords = {
    'type' : stateType,
    'stateid' : stateID,
    'stateName' : stateName,
    'geometry' : geometry
}

coordsDF = pd.DataFrame(data=allCoords)

In [32]:
coordsDF.head()

Unnamed: 0,geometry,stateName,stateid,type
0,"{'type': 'Polygon', 'coordinates': [[[-112.538...",Arizona,4,Feature
1,"{'type': 'Polygon', 'coordinates': [[[-94.0429...",Arkansas,5,Feature
2,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",California,6,Feature
3,"{'type': 'Polygon', 'coordinates': [[[-107.317...",Colorado,8,Feature
4,"{'type': 'Polygon', 'coordinates': [[[-72.3974...",Connecticut,9,Feature


In [None]:
d = {'col1': [1, 2], 'col2': [3, 4]}
>>> df = pd.DataFrame(data=d)
>>> df
   col1  col2
0     1     3
1     2     4

In [22]:
for feature in coordinateList['features']:
    stateType = feature['type']
    stateID = feature['properties']['STATE']
    stateName = feature['properties']['NAME']
    geometryType = feature['geometry']['type']
    geometryCoords = feature['geometry']['coordinates']
    
    coordsDF = coordsDF.append(pd.DataFrame({'type':stateType,
                                             'stateid':stateID,
                                             'state':stateName,
                                             'geotype':geometryType,
                                             'geocoords':geometryCoords}))
    

In [23]:
coordsDF.head()

Unnamed: 0,geocoords,geometry,geotype,state,stateid,type
0,"[[-112.538593, 37.000674], [-112.534545, 37.00...",,Polygon,Arizona,4,Feature
0,"[[-94.042964, 33.019219], [-94.043036, 33.0794...",,Polygon,Arkansas,5,Feature
0,"[[[-120.248484, 33.999329], [-120.247393, 34.0...",,MultiPolygon,California,6,Feature
1,"[[[-119.789798, 34.05726], [-119.770729, 34.05...",,MultiPolygon,California,6,Feature
2,"[[[-120.46258, 34.042627], [-120.440248, 34.03...",,MultiPolygon,California,6,Feature


In [None]:
data = data.append(pd.DataFrame({'A': i, 'B': i + 1}, index=[0]), ignore_index=True)

### Export expanded dataframe


In [None]:
newdf.to_csv('Output/ucr_violent_crime_rate_by_state_expanded.csv')