<a href="https://colab.research.google.com/github/lingqinlq/hello-world/blob/main/Boston%20Crime%20Analysis%20and%20visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
'''
Created on Jul 28, 2020

@author: Ling Qin

Title: Boston Crime Analysis


The code need to be donwloeded to local then run in terminal with code:

pip install streamlit

cd [file folder address i.e. /Users/Lynn/eclipse-workspace]

streamlit run Bostoncrime_LingQin.py


'''

import streamlit as st
import pandas as pd
import pydeck as pdk
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [None]:
# Set the Mapbox API key
MAPKEY = "pk.eyJ1IjoiY2hlY2ttYXJrIiwiYSI6ImNrOTI0NzU3YTA0azYzZ21rZHRtM2tuYTcifQ.6aQ9nlBpGbomhySWPF98DApk.eyJ1IjoiY2hlY2ttYXJrIiwiYSI6ImNrOTI0NzU3YTA0azYzZ21rZHRtM2tuYTcifQ.6aQ9nlBpGbomhySWPF98DA"

df = pd.read_csv("BostonCrime2020Sample.csv")
DISTRICTS = {"Downtown":"A1",
             "Charlestown":"A15",
             "East Boston":"A7",
             "Roxbury":"B2",
             "South Boston":"C6",
             "Dorchester":"C11",
             "South End":"D4",
             "Brighton":"D14",
             "West Roxbury":"E5",
             "Jamaica Plain":"E13",
             "Hyde Park":"E18"}


In [None]:
def district_select():
    district_select = st.radio("Please select a district to analyze:", list(DISTRICTS.keys()))
    st.header(f"Crime data analysis for {district_select}")
    return district_select

In [None]:
def district_compare(district_select):
    st.header(f"Let's compare {district_select} with other districts in Boston")
    comparison_district = st.multiselect("Select other districts for comparison: ", list(DISTRICTS.keys()))
    districtsList = [district_select]
    for n in comparison_district:
        if n == district_select:
            st.write("You select a district the same as before. Please select a new one.")
        else:
            districtsList.append(n)
    return districtsList

In [None]:
def crime_top5(district):
    pivot_tb = pd.pivot_table(df, values='INCIDENT_NUMBER', index=['DISTRICT'],
                              columns=['OFFENSE_DESCRIPTION'], aggfunc=len)
    topfive = pivot_tb.loc[district,:].sort_values(ascending=False)[:5]
    st.write(f'There are {int(np.sum(pivot_tb.loc[district,:]))} offense cases happened from January to July 2020.')
    st.write(f"The top 5 most often happened offense are:")
    plt.figure(figsize=(10,4))
    plt.barh(topfive.index, topfive.values, color='#557f2d', height= 0.5)
    plt.yticks(fontsize=7)

In [None]:
def offense_map(district):
    subdf = df[['DISTRICT','OFFENSE_DESCRIPTION','STREET','OCCURRED_ON_DATE','Lat','Long']]
    locationdata = subdf.loc[subdf.DISTRICT == district]
    locationdata.rename(columns={'Lat':'lat', 'Long':'lon'}, inplace=True)
    for name, code in DISTRICTS.items():
        if code == district:
            st.write(f"Details of the offense cases happened in {district}- {name}:")
    st.dataframe(locationdata)

    view_state = pdk.ViewState(
    latitude=locationdata["lat"].mean(),
    longitude=locationdata["lon"].mean(),
    zoom=11,
    pitch=0)

    layer1 = pdk.Layer('ScatterplotLayer',
                  data=locationdata,
                  get_position='[lon, lat]',
                  get_radius=30,
                  get_color=[168, 50, 50],
                  pickable=True
                  )
    tool_tip = {"html": "{STREET}: {OFFENSE_DESCRIPTION} at {OCCURRED_ON_DATE} ",
            "style": { "backgroundColor": "steelblue",
                        "color": "white"}
          }
    map1 = pdk.Deck(
        map_style='mapbox://styles/mapbox/light-v9',
        initial_view_state=view_state,
        mapbox_key=MAPKEY,
        layers=[layer1],
        tooltip= tool_tip)

    st.pydeck_chart(map1)

In [None]:
def district_comparison(comparison_district):
    pivot_tb = pd.pivot_table(df, values='INCIDENT_NUMBER', index=['DISTRICT'],
                              columns=['MONTH'], aggfunc=len)
    month = list(pivot_tb.columns)
    for d in comparison_district:
        plt.plot(month, pivot_tb.loc[DISTRICTS[d],:], label = d)
    plt.xlabel('Month')
    plt.ylabel("Number of offense cases")
    plt.legend()
    plt.xticks(fontsize=7)

In [None]:
st.title("Boston Crime Data Analysis")

#1: let user select a district for making a bar chart and a map
district_select = district_select()
st.pyplot(crime_top5(DISTRICTS[district_select]))
offense_map(DISTRICTS[district_select])

In [None]:
#2: let user select other districts for making a line chart to compare
districtsList = district_compare(district_select)
st.subheader(f"\nCrime data analysis for: {districtsList}")
st.pyplot(district_comparison(districtsList))