In [228]:
#import libraries necessary
import pymysql
from sqlalchemy import create_engine, text
import pandas as pd
from configparser import ConfigParser

#set up a parser object and file path to parse a text file in order to protect username and password for MySQL
parser = ConfigParser()
configPath = r'C:\Users\melac\Desktop\Practice\Jupyter Notebooks\ConfigFileSQLChicagoCensus.txt'
_ = parser.read(configPath)
parser.sections()

conn = create_engine(parser.get('my_db', 'conn'))

## Chicago Cenus Data Analysis

This notebook analyzes three socioeconomic data sets taken from the Chicago Data Portal. Use of both Python and SQL languages are incorporated. The data is stored in a MySQL server, and the pandas library is used to communicate with the server. Any data with missing values has been excluded. Sources and details on the data sets used can be found below:

[Socioeconomic Indicators in Chicago](https://data.cityofchicago.org/Health-Human-Services/Census-Data-Selected-socioeconomic-indicators-in-C/kn9c-c2s2)

[Chicago Public Schools](https://data.cityofchicago.org/Education/Chicago-Public-Schools-Progress-Report-Cards-2011-/9xs2-f89t)

[Chicago Crime Data](https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-present/ijzp-q8t2)

### Problem 1

##### Find the total number of crimes recorded in the CRIME table.

In [229]:
df = pd.read_sql_query("SELECT count(*) as Total_Number_of_Crimes FROM chicagocrimedata", conn)
display(df)

Unnamed: 0,Total_Number_of_Crimes
0,487


### Problem 2

##### List community areas with per capita income less than 11000.

In [230]:
df = pd.read_sql_query("SELECT COMMUNITY_AREA_NAME as Communities_With_Per_Capita_Income_Less_Than_11000 FROM chicagocensusdata WHERE PER_CAPITA_INCOME < 11000", conn)
display(df)

Unnamed: 0,Communities_With_Per_Capita_Income_Less_Than_11000
0,West Garfield Park
1,South Lawndale
2,Fuller Park
3,Riverdale


### Problem 3

##### List all case numbers for crimes  involving minors?(children are not considered minors for the purposes of crime analysis)

In [231]:
df = pd.read_sql_query(text("SELECT CASE_NUMBER FROM chicagocrimedata WHERE DESCRIPTION LIKE \"%MINOR%\""), conn)
display(df)

Unnamed: 0,CASE_NUMBER
0,HL266884
1,HK238408


### Problem 4

##### List all kidnapping crimes involving a child?

In [232]:
df = pd.read_sql_query(text("SELECT * FROM chicagocrimedata WHERE DESCRIPTION LIKE \"%CHILD%\" AND PRIMARY_TYPE LIKE \"%KIDNAPPING%\""), conn)
display(df)

Unnamed: 0,ID,CASE_NUMBER,DATE,BLOCK,ICUR,PRIMARY_TYPE,DESCRIPTION,LOCATION_DESCRIPTION,ARREST,DOMESTIC,...,DISTRICT,WARD,COMMUNITY_AREA_NUMBER,FBICODE,X_COORDINATE,Y_COORDINATE,YEAR,LATITUDE,LONGITUDE,LOCATION
0,5276766,HN144152,2007-01-26,050XX W VAN BUREN ST,1792,KIDNAPPING,CHILD ABDUCTION/STRANGER,STREET,False,False,...,15,29,25,20,1143050,1897546,2007,42,-87.75024931,"(41.874908413, -87.750249307)"


### Problem 5

##### What kinds of crimes were recorded at schools?

In [233]:
df = pd.read_sql_query(text("SELECT PRIMARY_TYPE, DESCRIPTION FROM chicagocrimedata WHERE LOCATION_DESCRIPTION LIKE \"%SCHOOL%\""), conn)
display(df)

Unnamed: 0,PRIMARY_TYPE,DESCRIPTION
0,BATTERY,SIMPLE
1,BATTERY,PRO EMP HANDS NO/MIN INJURY
2,BATTERY,SIMPLE
3,BATTERY,SIMPLE
4,BATTERY,SIMPLE
5,CRIMINAL DAMAGE,TO VEHICLE
6,NARCOTICS,POSS: HEROIN(WHITE)
7,NARCOTICS,MANU/DEL:CANNABIS 10GM OR LESS
8,ASSAULT,PRO EMP HANDS NO/MIN INJURY
9,CRIMINAL TRESPASS,TO LAND


### Problem 6

##### List the average safety score for all types of schools.

In [234]:
df = pd.read_sql_query("SELECT `Elementary, Middle, or High School`, AVG(SAFETY_SCORE) as AVG_SAFETY_SCORE FROM chicagopublicschools GROUP BY `Elementary, Middle, or High School`", conn)
display(df)

Unnamed: 0,"Elementary, Middle, or High School",AVG_SAFETY_SCORE
0,ES,49.5204
1,HS,49.6235
2,MS,48.0


### Problem 7

##### List 5 community areas with highest % of households below poverty line

In [235]:
df = pd.read_sql_query("SELECT DISTINCT COMMUNITY_AREA_NAME, PERCENT_HOUSEHOLDS_BELOW_POVERTY FROM chicagocensusdata ORDER BY PERCENT_HOUSEHOLDS_BELOW_POVERTY DESC LIMIT 5", conn)
display(df)

Unnamed: 0,COMMUNITY_AREA_NAME,PERCENT_HOUSEHOLDS_BELOW_POVERTY
0,Riverdale,56.5
1,Fuller Park,51.2
2,Englewood,46.6
3,North Lawndale,43.1
4,East Garfield Park,42.4


### Problem 8

##### Which community area is most crime prone?

In [236]:
df = pd.read_sql_query("SELECT DISTINCT COMMUNITY_AREA_NUMBER, COUNT(COMMUNITY_AREA_NUMBER) as NUMBER_OF_CRIMES FROM chicagocrimedata GROUP BY COMMUNITY_AREA_NUMBER ORDER BY NUMBER_OF_CRIMES DESC LIMIT 1", conn)
display(df)

Unnamed: 0,COMMUNITY_AREA_NUMBER,NUMBER_OF_CRIMES
0,25,42


### Problem 9

##### Use a sub-query to find the name of the community area with highest hardship index

In [237]:
df = pd.read_sql_query("SELECT COMMUNITY_AREA_NAME, (SELECT MAX(HARDSHIP_INDEX)) as HIGHEST_HARDSHIP_INDEX FROM chicagocensusdata", conn)
display(df)

Unnamed: 0,COMMUNITY_AREA_NAME,HIGHEST_HARDSHIP_INDEX
0,Rogers Park,98


### Problem 10

##### Use a sub-query to determine the Community Area Name with most number of crimes?

In [238]:
df = pd.read_sql_query("SELECT DISTINCT census.COMMUNITY_AREA_NAME, crime.COMMUNITY_AREA_NUMBER, (SELECT COUNT(crime.COMMUNITY_AREA_NUMBER)) as NUMBER_OF_CRIMES FROM chicagocensusdata census, chicagocrimedata crime WHERE crime.COMMUNITY_AREA_NUMBER=census.COMMUNITY_AREA_NUMBER GROUP BY COMMUNITY_AREA_NUMBER ORDER BY NUMBER_OF_CRIMES DESC LIMIT 1", conn)
display(df)

Unnamed: 0,COMMUNITY_AREA_NAME,COMMUNITY_AREA_NUMBER,NUMBER_OF_CRIMES
0,Austin,25,42
