In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Read in both csv files into a dataframe

In [87]:
df_gunviolence_raw = pd.read_csv('gun_violence_archive_2016.csv', delimiter = ',')
df_populationdensity_raw = pd.read_csv('population_density_2016.csv', delimiter = ',')

## We will need to do some cleaning for the gunviolence data.

In [49]:
df_gunviolence_raw.head()

Unnamed: 0,https://www.gunviolencearchive.org/,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6
0,Incident Date,State,City Or County,Address,# Killed,# Injured,Operations
1,31-Dec-16,Indiana,Indianapolis,4006 Newburgh Drive,1,0,
2,31-Dec-16,Kentucky,Louisville,2300 block of Griffiths Avenue,0,1,
3,31-Dec-16,Michigan,Detroit,13000 block of Kilbourne,0,1,
4,31-Dec-16,Louisiana,Covington (Claiborne),Green Road,0,1,


In [125]:
# Replace empty row where titles should be with first row of data where titles are
df_gunviolence_raw.columns = df_gunviolence_raw.iloc[0]
# Delete unnecessary columns we will not be using
df_gunviolence = df_gunviolence_raw.drop(['Incident Date', 'Address', 'Operations'], axis = 1)
# Turn 'City Or County' data into a series. From here we will delete the space and any
# strings in parentheses
ser = pd.Series(df_gunviolence['City Or County'])
df_gunviolence['City Or County'] = ser.str.split(' \(').str[0]
# Change the 'State' column into City, State
df_gunviolence['State'] = df_gunviolence['City Or County'] + ', ' + df_gunviolence['State']
# Delete the city column as we have it combined with the state data
df_gunviolence.drop(['City Or County'], axis = 1, inplace = True)
# Delete the unnecessary first row with just titles
df_gunviolence.drop([0], inplace= True)
# Rename 'State' column to location so we can key off this later
df_gunviolence.rename(columns = {'State':'Location'}, inplace = True)
# Sort by alphabetical order
df_gunviolence.sort_values(by = 'Location', inplace = True)
# Reset and then drop old index
df_gunviolence.reset_index(inplace = True)
df_gunviolence.drop(['index'], axis = 1, inplace = True)

In [127]:
df_gunviolence

Unnamed: 0,Location,# Killed,# Injured
0,"Abington, Massachusetts",0,1
1,"Albany, Georgia",0,1
2,"Albuquerque, New Mexico",4,1
3,"Albuquerque, New Mexico",3,0
4,"Altadena, California",1,1
5,"Altamonte Springs, Florida",0,3
6,"Altavista, Virginia",0,1
7,"Alvin, Texas",0,1
8,"Amarillo, Texas",0,1
9,"Amarillo, Texas",4,0


## Now we clean up the population density data.

In [128]:
df_populationdensity_raw

Unnamed: 0,City,Population density,2016 Population,Land Area,http://www.governing.com/gov-data/population-density-land-area-cities-map.html
0,"Abilene, Texas",1145,122225,107,
1,"Akron, Ohio",3186,197633,62,
2,"Alameda, California",7437,78906,11,
3,"Albany, Georgia",1339,73801,55,
4,"Albany, New York",4587,98111,21,
5,"Albany, Oregon",3034,53211,18,
6,"Albuquerque, New Mexico",2979,559277,188,
7,"Alexandria, Virginia",10367,155810,15,
8,"Alhambra, California",11202,85474,8,
9,"Aliso Viejo, California",6884,51424,7,


In [92]:
df_populationdensity = df_populationdensity_raw.drop(['http://www.governing.com/gov-data/population-density-land-area-cities-map.html'], axis = 1)
df_populationdensity.rename(columns = {'City':'Location'})

Unnamed: 0,Location,Population density,2016 Population,Land Area
0,"Abilene, Texas",1145,122225,107
1,"Akron, Ohio",3186,197633,62
2,"Alameda, California",7437,78906,11
3,"Albany, Georgia",1339,73801,55
4,"Albany, New York",4587,98111,21
5,"Albany, Oregon",3034,53211,18
6,"Albuquerque, New Mexico",2979,559277,188
7,"Alexandria, Virginia",10367,155810,15
8,"Alhambra, California",11202,85474,8
9,"Aliso Viejo, California",6884,51424,7


In [91]:
df_populationdensity.head()

Unnamed: 0,City,Population density,2016 Population,Land Area
0,"Abilene, Texas",1145,122225,107
1,"Akron, Ohio",3186,197633,62
2,"Alameda, California",7437,78906,11
3,"Albany, Georgia",1339,73801,55
4,"Albany, New York",4587,98111,21


Unnamed: 0,City,Population density,2016 Population,Land Area,http://www.governing.com/gov-data/population-density-land-area-cities-map.html
0,"Abilene, Texas",1145,122225,107,
1,"Akron, Ohio",3186,197633,62,
2,"Alameda, California",7437,78906,11,
3,"Albany, Georgia",1339,73801,55,
4,"Albany, New York",4587,98111,21,
