# Project Two
## Group 5: Brian, Josh, Jeff, Yuliya, Natalie
2016 Election and Police/Judicial Employment

In [74]:
# Import packages
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
import os

# (E) EXTRACTION

### Load in police data

In [70]:
# Define directory to police files
police_dir = "Resources/ucr-police-employee-data-2016-by-state-by-counties/"

# Find paths to all police files
police_paths = os.listdir(police_dir)

# Initialize df list
police_df_list = []

# Iterate through each path
for path in police_paths:
    # Identify state of origin
    state = pd.read_excel(police_dir + path, header = 0).iloc[0,0]
    
    # Load in data
    police_df = pd.read_excel(police_dir + path, header = 4)

    # Add state column
    police_df['State'] = state
    
    # Remove '\n' from column names
    police_df = police_df.rename(columns={'Metropolitan/Nonmetropolitan': 'Metropolitan',
                                          'Total law\nenforcement\nemployees': 'Total Law Enforcement Employees',
                                          'Total\nofficers': 'Total Officers',
                                          'Total\ncivilians': 'Total Civilians'})
    
    # Append to list
    police_df_list.append(police_df)

# Merge dataframes together
police_df = pd.concat(police_df_list, axis = 0)
police_df.head()
# police_df.shape
# police_df['State'].unique()

Unnamed: 0,Metropolitan,County,Total Law Enforcement Employees,Total Officers,Total Civilians,State
0,Metropolitan Counties,Anoka,256.0,129.0,127.0,MINNESOTA
1,,Benton,70.0,24.0,46.0,MINNESOTA
2,,Blue Earth,69.0,31.0,38.0,MINNESOTA
3,,Carlton,52.0,22.0,30.0,MINNESOTA
4,,Carver,148.0,77.0,71.0,MINNESOTA


### Load in voting data

In [83]:
# Create engine using the `demographics.sqlite` database file
engine = create_engine("sqlite:///Resources/archive/database.sqlite")

# Declare a Base using `automap_base()`
Base = automap_base()

# Use the Base class to reflect the database tables
Base.prepare(engine, reflect=True)

# Create a session
session = Session(engine)

# Create a connection
conn = engine.connect()

# Generate dataframe
voting_df = pd.read_sql_query('select * from primary_results', con=engine)
voting_df.head()

Unnamed: 0,state,state_abbreviation,county,fips,party,candidate,votes,fraction_votes
0,Alabama,AL,Autauga,1001,Democrat,Bernie Sanders,544,0.182
1,Alabama,AL,Autauga,1001,Democrat,Hillary Clinton,2387,0.8
2,Alabama,AL,Baldwin,1003,Democrat,Bernie Sanders,2694,0.329
3,Alabama,AL,Baldwin,1003,Democrat,Hillary Clinton,5290,0.647
4,Alabama,AL,Barbour,1005,Democrat,Bernie Sanders,222,0.078


# (T) TRANSFORM

### Format Police Data

In [71]:
# Modify values in Metropolitan column to be more readable
police_df = police_df.drop('Metropolitan', axis = 1)

# Modify State to be first capitalized only
police_df['State'] = police_df['State'].str.capitalize()
police_df.head()

Unnamed: 0,County,Total Law Enforcement Employees,Total Officers,Total Civilians,State
0,Anoka,256.0,129.0,127.0,Minnesota
1,Benton,70.0,24.0,46.0,Minnesota
2,Blue Earth,69.0,31.0,38.0,Minnesota
3,Carlton,52.0,22.0,30.0,Minnesota
4,Carver,148.0,77.0,71.0,Minnesota
