# Title

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

# !pip install folium
import folium

# !pip install pyproj
import pyproj

%matplotlib inline

crimeHousing = pd.read_csv("crime-housing-austin-2015.csv")

## Analysis 1 - Most Common Crimes (Bar Plot)

In [None]:
print('There are',len(crimeHousing['Highest_Offense_Desc'].unique()), 'different types of crime in Austin.')

crimeHousing2 = crimeHousing.copy()
crimeHousing2 = crimeHousing2.dropna()
crimeHousing2['Highest_Offense_Desc'].value_counts().head()

In [None]:
crimeHousing2['Highest_Offense_Desc'].value_counts().head(20).plot(kind='bar')

plt.xlabel("Highest_Offense_Desc")
plt.ylabel("Amount of crimes")
plt.title('Top 20 committed crimes')

## Analysis 2 - Crime Type in Proportion to Crimes in the Database (Pie Chart)

In [None]:
top20 = crimeHousing2['Highest_Offense_Desc'].value_counts().head(20).index
def desc_map(s):
    if s in top20: 
        return s 
    else: 
        return 'misc'
    
crimeHousing2['Description_Short']=crimeHousing['Highest_Offense_Desc'].apply(lambda x: desc_map(x))

fig1, ax1 = plt.subplots(figsize=(15, 8))
labels = crimeHousing2['Description_Short'].value_counts().index
ax1.pie(crimeHousing2['Description_Short'].value_counts().sort_values(ascending=False), labels=labels, autopct='%1.1f%%', textprops={'fontsize': 14})
ax1.axis('equal')

plt.show()

## Analysis 3 - Histogram of Number of Crimes Per Zip Code (Bar Chart)

In [None]:
# zip codes with the most crime
crimeHousing2.fillna(0, inplace=True)
crimeHousing2['Zip_Code_Crime'] = crimeHousing2['Zip_Code_Crime'].apply(lambda x: str(x)[:5])
Top5 = crimeHousing2[crimeHousing2['Zip_Code_Crime']!='0.0']['Zip_Code_Crime'].value_counts().sort_values(ascending=False)

Top5.plot(kind='bar')
plt.xlabel("Zip Codes")
plt.ylabel("Amount of crimes")

plt.title('Zip Codes with the highest crime rate')

## Analysis 4 - Map of Austin with Dots at Crime Locations, random 10% sample (Scatterplot)

In [None]:
# Austin latitude and longitude values
latitude = 30.26
longitude = -97.73

# create map and display it
austin_map = folium.Map(location=[latitude, longitude], zoom_start=11.5)

# display the map of Austin
austin_map

import warnings
warnings.filterwarnings("ignore")

# Changing the coordinate format from NAD83 to UTM
fips2401 = pyproj.Proj("+proj=lcc +lat_1=30.11666666666667 +lat_2=31.88333333333333 +lat_0=29.66666666666667 +lon_0=-100.3333333333333 +x_0=700000 +y_0=3000000 +ellps=GRS80 +datum=NAD83 +to_meter=0.3048006096012192 +no_defs")
wgs84 = pyproj.Proj("+init=EPSG:4326")

crimeHousing[['lon', 'lat']] = pd.DataFrame(pyproj.transform(fips2401, wgs84, crimeHousing.X_Coordinate.to_numpy(), crimeHousing.Y_Coordinate.to_numpy())).T

# get a random 10% crimes in the crimeHousing dataframe
crimeHousing = crimeHousing.dropna()
sampleCrime = crimeHousing.sample(frac = .1)

# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

In [None]:
# loop through the sample of crimes and add each to the incidents feature group
for lat, lng, label in zip(sampleCrime.lat, sampleCrime.lon, sampleCrime.Highest_Offense_Desc):
        folium.features.CircleMarker(
            [lat, lng],
            radius=.5, # define how big you want the circle markers to be
            color='blue',
            fill=True,
            popup = label,
#             fill_color='blue',
#             fill_opacity=0.6
        ).add_to(austin_map)


# add pop-up text to each marker on the map 

# add incidents to map
austin_map.add_child(incidents)

## Analysis 5 - Cluster Map of Austin Using the Same Sample of Crime Locations

In [None]:
from folium import plugins

# let's start again with a clean copy of the map of Austin
austin_map = folium.Map(location = [latitude, longitude], zoom_start = 11.5)

# instantiate a mark cluster object for the incidents in the dataframe
incidents = plugins.MarkerCluster().add_to(austin_map)

# loop through the dataframe and add each data point to the mark cluster
for lat, lng, label, in zip(sampleCrime.lat, sampleCrime.lon, sampleCrime.Highest_Offense_Desc):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(incidents)

# display map
austin_map

## Analysis 6

In [None]:
crime = pd.read_csv('crime-housing-austin-2015.csv')
crime_df = pd.read_table('crime-housing-austin-2015.csv', delimiter=',')
crime_df2 = pd.DataFrame(crime_df, columns=["Highest_NIBRS_UCR_Offense_Description", "Clearance_Status", "Medianhouseholdincome"])
crime_df2["Medianhouseholdincome"] = crime_df2["Medianhouseholdincome"].str.replace('$', '', regex=True)
crime_df3 = pd.DataFrame(crime_df2, columns=["Clearance_Status", "Medianhouseholdincome"])
crime_df3 = crime_df3.groupby("Medianhouseholdincome")
crime_df4 = crime_df3["Clearance_Status"].value_counts().reset_index(name="Count")
values = crime_df4.values

crime_df5 = pd.DataFrame(values, columns=["Medianhouseholdincome", "Clearance_Status", "Count"])
crime_df6 = crime_df5.pivot(index="Medianhouseholdincome", columns="Clearance_Status", values="Count")
crime_df7 = crime_df6.rename(columns={"C": "C=Cleared By Arrest", "O": "O=Cleared By Exception", "N": "N=Not Cleared"})

display(crime_df7.plot().set_title("Clearance Status Volume By Income Level"))
crime_df8 = crime_df6.reset_index()
crime_df8["N"] = pd.to_numeric(crime_df8["N"])
crime_df8["C"] = pd.to_numeric(crime_df8["C"])
crime_df8["O"] = pd.to_numeric(crime_df8["O"])
crime_df8["Medianhouseholdincome"] = pd.to_numeric(crime_df8["Medianhouseholdincome"])
values2 = [crime_df8["N"].corr(crime_df8["Medianhouseholdincome"]), crime_df8["O"].corr(crime_df8["Medianhouseholdincome"]), crime_df8["C"].corr(crime_df8["Medianhouseholdincome"])]
crime_df9 = pd.DataFrame({"N_P": [crime_df8["N"].corr(crime_df8["Medianhouseholdincome"])], "O_P": [crime_df8["O"].corr(crime_df8["Medianhouseholdincome"])], "C_P": [crime_df8["C"].corr(crime_df8["Medianhouseholdincome"])]}).reset_index()
display(crime_df9.plot.bar().set_title("Pearson Coefficients For N, O, and C Clearance Status Values Correlated With Median Household Income"))

## Analysis 7

## Analysis 8