## Import Libraries

In [1]:
import pandas as pd
import altair as alt
import geopandas as gpd
import osmnx as ox
import folium
import numpy as np
from folium import plugins
import geopandas as gpd
import warnings
warnings.filterwarnings('ignore')
from folium.plugins import MarkerCluster
from branca.element import Template, MacroElement
alt.data_transformers.disable_max_rows()
import html
import matplotlib.pyplot as plt
from textwrap import wrap



## About Dataset

**SPD Crime Data: 2008-Present**

The analysis utilizes crime data sourced from the Seattle Police Department spanning the years 2008 to 2022, available at https://data.seattle.gov/Public-Safety/SPD-Crime-Data-2008-Present/tazs-3rd5. The dataset encompasses a comprehensive array of information crucial for understanding and analyzing criminal activities. Each crime report is uniquely identified by a Report Number, facilitating tracking and reference, while the Offense ID serves as an added unique identifier for individual offenses. The temporal aspects of the offenses are detailed through Offense Start date and end date," providing insights into the duration of events. This comprehensive data set serves as a valuable resource for researchers and policymakers to discern trends, formulate evidence-based strategies, and enhance public safety initiatives in the Seattle area.   


**Report Number:** This is a unique identifier for the crime report, used for tracking and reference.<br>

**Offense ID:** Similar to the Report Number, this is also a unique identifier assigned to each individual offense recorded.<br>

**Offense Start DateTime:** The date and time when the offense began.<br>

**Offense End DateTime:** The date and time when the offense ended. This can be the same as the start date and time if the event was instantaneous.<br>

**Report DateTime:** The date and time when the offense was reported to the authorities.<br>

**Group A B:** This classification system is typically used in crime reporting to differentiate between the severity of offenses. Group A offenses might include more serious crimes that are considered felonies, while Group B could be less serious or misdemeanor crimes.<br>

**Crime Against Category:** Describes the victim type of the crime, which can be a person, property, or societal norm (like public order offenses).<br>

**Offense Parent Group:** This category groups offenses into broader classes that reflect the nature of the crime, such as theft, assault, or vandalism. These groups help in analyzing crime patterns and formulating policing strategies.<br>

**Offense:** This is a detailed description of the nature of the crime. For instance, 'Shoplifting' is a specific type of 'Larceny-Theft'. This detailed level of categorization is crucial for legal proceedings and statistical analysis.<br>

**Offense Code:** Every type of offense is assigned a specific code, often based on national or state crime coding systems. These codes are essential for reporting crimes to national databases for statistical purposes and can help in identifying trends.<br>

**Precinct:** The police precinct or district where the offense occurred. This is a geographical area used for administrative and patrol purposes.<br>

**Sector:** Within a precinct, sectors are sub-areas that help further organize police patrols and responses.<br>

**Beat:** Even smaller divisions within a sector, a beat is the actual route that a patrol officer covers.<br>

**MCPP:** This stands for Micro Community Policing Plans, which are tailored policing strategies for different communities within a sector.<br>

**100 Block Address:** This provides a general location of the offense, giving the 100 block level for anonymity and privacy.<br>

**Longitude:** The longitudinal coordinate of where the offense occurred, used for mapping and analysis.<br>

**Latitude:** The latitudinal coordinate of where the offense occurred, also used for mapping and analysis.<br>

### Read Dataset

In [2]:
## Primary focus is to do the analysis for 2022 year (Most Recent)
crime = pd.read_csv('modified_crime_data.csv')
# Remove spaces in column names
crime.columns = crime.columns.str.replace("[' ]", '', regex=True)

In [3]:
crime.columns

Index(['ReportNumber', 'OffenseID', 'GroupAB', 'CrimeAgainstCategory',
       'OffenseParentGroup', 'Offense', 'OffenseCode', 'Precinct', 'Sector',
       'Beat', 'MCPP', '100BlockAddress', 'Longitude', 'Latitude',
       'DayoftheWeek', 'Month', 'Quarter', 'Year', 'OffenseStartDateTimeDate',
       'OffenseStartDateTimeTime', 'OffenseEndDateTimeDate',
       'OffenseEndDateTimeTime', 'ReportDateTimeDate', 'ReportDateTimeTime',
       'OffenseStartHour', 'OffenseEndHour', 'ReportHour'],
      dtype='object')

In [4]:
# Read the entire dataset to visualize the trend over time 
crime_full=pd.read_csv('crime2008_2022.csv')
# Remove spaces in column names
crime_full.columns = crime_full.columns.str.replace("[' ]", '', regex=True)
crime_2008 = crime_full[crime_full['Year'] > 2008]
crime['OffenseStartDateTimeDate'] = pd.to_datetime(crime['OffenseStartDateTimeDate'])
crime['OffenseEndDateTimeDate'] = pd.to_datetime(crime['OffenseEndDateTimeDate'])
crime['ReportDateTimeDate'] = pd.to_datetime(crime['ReportDateTimeDate'])

### Set Custom Themes for all the graphs

In [5]:
def custom_theme():
    return {
        'config': {
            'header': {'titleFontSize': 40,
                       'labelFontSize': 16,
                       'font': 'Sans Serif',
                       'labelColor': 'black',
                       'titleColor': 'black'},
            'axis': {'titleFontSize': 16,
                     'labelFontSize': 14,
                     'labelColor': 'dimgray',
                     'font': 'Sans Serif',
                     'titleColor': 'dimgray',
                     'grid': False,
                     'labelOffset': 5,
                     'labelPadding': 8,
                     'titlePadding': 15},
            'title': {'fontSize': 20,
                      'font': 'Sans Serif',
                      'anchor': 'middle',
                      'color': 'black',
                      'offset': 11},
            'view': {'strokeWidth': 0
                    }
        }
    }

# Apply the custom theme to the chart
alt.themes.register('custom_theme', custom_theme)
alt.themes.enable('custom_theme')




ThemeRegistry.enable('custom_theme')

## Exploratory Data Analysis

### Graph 1: Analyzing the crime trend from 2008 to 2002.



In [6]:
year_crime=crime_2008.groupby(['Year']).size().reset_index(name='Count')
chart = alt.Chart(year_crime).mark_line(size=3).encode(
    alt.X('Year:O',
          axis=alt.Axis(labelAngle=0, title='Year')),
    alt.Y('Count:Q',scale = alt.Scale(domain = [50000, 80000]),
          axis=alt.Axis(title='Number Of Crimes',format='~s')),
).properties(
    width=800,
    height=300,
    title = {
        "text" : ['Analyzing Crime Trends in Seattle'],
        "subtitle":['Year: 2008 - 2022'],
        
        "offset":20,
        "subtitlePadding":10,
        "subtitleFontSize":16
    
    }
)

source_text = alt.Chart(
    pd.DataFrame(
        {'source': ['Source: https://data.seattle.gov/Public-Safety/SPD-Crime-Data-2008-Present/']})).mark_text(
    fontSize=12,
    text='source',
    dx=200,
  
    dy=230
).encode(
    text='source:N'
)

desc_text = alt.Chart(pd.DataFrame({'desc': ['The number of crime rates in seattle are increasing over time']})).mark_text(
    fontSize=15,
    text='source',
    align='right',    
    dy=80,
    dx=340
).encode(
    text='desc:N'
)

covid_text = alt.Chart(pd.DataFrame({'desc': ['COVID-19']})).mark_text(
    fontSize=14,
    text='source',
    align='right',    
    dy=-128,
    dx=276
).encode(
    text='desc:N'
)

chart = chart+source_text+desc_text+covid_text
#chart

#### Effectiveness of the graph:

The purpose of this graph is to visually depict the trend in crime incidents over the years from 2008 to 2022 in the dataset. The x-axis represents each individual year, while the y-axis quantifies the count of reported crimes. By utilizing a line chart, the graph allows for a clear and intuitive understanding of how the overall volume of crimes has changed over the specified time period. The upward or downward trajectory of the line provides a quick visual assessment of whether crime rates have increased, decreased, or remained relatively stable throughout the years. This graphical representation aids in identifying potential patterns, trends, or anomalies in the data, offering valuable insights for law enforcement, policymakers, and researchers interested in understanding the longitudinal dynamics of criminal activity in the Seattle area. The chart's simplicity and clarity make it an effective tool for communicating complex temporal patterns in crime data to a diverse audience.

Now, let's shift our foucs to understand the crime rate in 2022. 

### Graph 2: Analyzing the crime trend in 2022.

In [7]:
# to date time
crime['end_month_num'] = crime['OffenseEndDateTimeDate'].dt.month
crime['start_month_num'] = crime['OffenseStartDateTimeDate'].dt.month
## Line chart of crime count each month, with Season dash lines references on 2022

# Line chart of monthly crime count
monthly_crime = alt.Chart(crime).mark_line(size=3).encode(
    x = alt.X('start_month_num:O'\
            
            ,axis=alt.Axis(labelAngle=0), 
              title = 'Month'),
    y = alt.Y('count_all:Q', scale = alt.Scale(domain = [3900, 5000]), title = 'Count Crime')
)\
.transform_aggregate(count_all = 'count(report_number)', groupby = ['start_month_num'])\
.properties(height=400,
            width = 500,
            
            title = {
        "text" : ['Monthly Crime Trends in Seattle'],
        "subtitle":['Year: 2022'],
        
        "offset":20,
        "subtitlePadding":10,
        "subtitleFontSize":16
    
    }
    )

# Dash combos for season
season_dash = alt.Chart(pd.DataFrame({
    'month_num': [1,4, 7,10]})).mark_rule(strokeDash=[8, 4]).encode(
    x=alt.X('month_num:O')
)

# Text combos for season
season_mapping = pd.DataFrame({
    'month_num': [1,4,7,10],
    'season': ['Winter', 'Spring', 'Summer', 'Fall']
})
season_text = alt.Chart(season_mapping).mark_text(dx = 23, dy = -130).encode(
    text = alt.Text('season:N'),
    x = alt.X('month_num:O')
).transform_calculate()
final = monthly_crime + season_dash + season_text
source_text = alt.Chart(
    pd.DataFrame(
        {'source': ['Source: https://data.seattle.gov/Public-Safety/SPD-Crime-Data-2008-Present/']})).mark_text(
    fontSize=12,
    text='source',
    dx=70,
    dy=275
).encode(
    text='source:N'
)

desc_text = alt.Chart(pd.DataFrame({'desc': ["The crime rate is at peak during Summers and it's declining as we are reaching end of the year"]})).mark_text(
    fontSize=14,
    text='source',
    #align='right',    
    dy=300
).encode(
    text='desc:N'
)

final=final+source_text+desc_text
#final

#### Observation: 

The graph illustrating crime counts in Seattle for the year 2022 reveals a distinct seasonal trend in criminal activity. The year begins with higher crime rates in the winter months, which then dip towards the end of the season. With the arrival of spring, there is a marked rise in crime, peaking notably in May. This peak could be attributed to various factors, such as increased outdoor activity during the milder weather. As summer commences, the data shows considerable fluctuation, with a significant downturn in July, which swiftly reverses into another peak in August, suggesting a possible connection to seasonal social dynamics or events. Following this, there is a sharp decline as autumn approaches, culminating in the lowest crime rates of the year towards the end of fall. This pattern, particularly the troughs and peaks, may provide insights for law enforcement and community planners to allocate resources more effectively and develop targeted interventions at times when crime is historically higher. 

 

#### Effectiveness of the graph: 

This graph is designed to present the monthly crime count in Seattle for the year 2022. The x-axis represents the months of the year, while the y-axis quantifies the number of crimes reported each month. By deploying a line chart, the graph provides a straightforward and comprehensible visualization of the fluctuations in crime rate throughout the different seasons, which are annotated on the x-axis, dividing the year into Winter, Spring, Summer, and Fall. 

 

The line chart clearly shows Seattle's monthly crime trends in 2022, with seasonal labels aiding in understanding the data's context. It's simple design and clear labels facilitate easy comparison of crime counts across months, highlighting key changes such as the notable decrease from Fall to Winter. This graph serves as an accessible and effective tool for analyzing crime patterns, useful for both policymakers and the public. 

### Graph 3: Top 10 Crimes in Seattle

In [8]:
# find the count for each OffenseParentGroup
counts_crime = crime['OffenseParentGroup'].value_counts().reset_index()
counts_crime.columns = ['OffenseParentGroup', 'Counts']
# find top 10 counts
top_counts_df = counts_crime.nlargest(10, 'Counts')
# The original labels were having all the offenses as Capitals, capitalize the OffenseParentGroup to enhance 
# the readability of the graph
top_counts_df['Formatted_Label'] = counts_crime['OffenseParentGroup'].str.capitalize()

bar_chart = alt.Chart(top_counts_df).mark_bar(color='steelblue').encode(
    x=alt.X('Counts:Q', title='Crime Count',axis=alt.Axis(ticks=False, domain=False,values=[])),
    y=alt.Y('Formatted_Label:N', sort='-x', title='',axis=alt.Axis(labelLimit=500)),
).properties(
    width=600,
    height=400,
)

text_chart = bar_chart.mark_text(
    align='left',
    baseline='middle',
    dx=3 
).encode(
    text='Counts:Q'
)

chart = (bar_chart + text_chart).properties(
     title = {
        "text" : ['Top 10 Crimes in Seattle'],
        "subtitle":['By Offense Type'],
        
        "offset":20,
        "subtitlePadding":10,
        "subtitleFontSize":16
    
    }
)
source_text = alt.Chart(
    pd.DataFrame(
        {'source': ['Source: https://data.seattle.gov/Public-Safety/SPD-Crime-Data-2008-Present/']})).mark_text(
    fontSize=12,
    text='source',
    dx=70,
    dy=275
).encode(
    text='source:N'
)

desc_text_1 = alt.Chart(pd.DataFrame({'desc': ['Larcency Theft contributes around 70% ']})).mark_text(
    fontSize=15,
    text='source',
    align='right',    
    dy=-70,
    dx=300,
    baseline='middle', lineBreak=r'\n'
).encode(
    text='desc:N'
)

desc_text_2 = alt.Chart(pd.DataFrame({'desc': ['of crime in Seattle ']})).mark_text(
    fontSize=15,
    text='source',
    align='right',    
    dy=-50,
    dx=295,
    baseline='middle', lineBreak=r'\n'
).encode(
    text='desc:N'
)

chart=chart+source_text+desc_text_1+desc_text_2
#chart

### Graph:4 Median Reporting Times for Different Types of Crimes in 

In [9]:

crime['DurationToReport'] = (crime['ReportDateTimeDate'] - crime['OffenseStartDateTimeDate']).dt.total_seconds() / 60

calculation_df = crime.groupby('OffenseParentGroup')['DurationToReport'].median().reset_index()
calculation_df.columns = ['OffenseParentGroup', 'DurationToReport']
top_calculation_df = calculation_df.sort_values(by='DurationToReport', ascending=False).head(10)

top_calculation_df['Formatted_Label'] = top_calculation_df['OffenseParentGroup'].str.capitalize()

bar_chart = alt.Chart(top_calculation_df).mark_bar().encode(
    x=alt.X('DurationToReport:Q',
            title='Duration to File a Report (in minutes)',
            axis=alt.Axis(ticks=False, domain=False,values=[])),
    y=alt.Y('Formatted_Label:N', sort='-x', title=' ',axis=alt.Axis(labelLimit=500)), 
).properties(
    width=600,
    height=400
)
text_labels = alt.Chart(top_calculation_df).mark_text(
    align='left',
    baseline='middle',
    dx=3  
).encode(
    x=alt.X('DurationToReport:Q', stack='zero'),
    y=alt.Y('Formatted_Label:N', sort='-x'),
    text=alt.Text('DurationToReport:Q')  
)

chart = (bar_chart + text_labels).properties(
 
     title = {
        "text" : ['Median Reporting Time'],
        "subtitle":['By Offense Type'],
        
        "offset":20,
        "subtitlePadding":10,
        "subtitleFontSize":16
    
    }
)

source_text = alt.Chart(
    pd.DataFrame(
        {'source': ['Source: https://data.seattle.gov/Public-Safety/SPD-Crime-Data-2008-Present/']})).mark_text(
    fontSize=12,
    text='source',
    dx=70,
    dy=275
).encode(
    text='source:N'
)

desc_text_1 = alt.Chart(pd.DataFrame({'desc': ['Financial & Sexual Offenses']})).mark_text(
    fontSize=16,
    text='source',
    align='right',    
    dy=-60,
    dx=300
).encode(
    text='desc:N'
)

desc_text_2 = alt.Chart(pd.DataFrame({'desc': ['exhibit extended processing times']})).mark_text(
    fontSize=16,
    text='source',
    align='right',    
    dy=-40,
    dx=300
).encode(
    text='desc:N'
)


chart=chart+source_text+desc_text_1+desc_text_2
#chart

### Graph5: Crime Comparison Across Micro Communities Based on Assault and Burglary Offense

In [10]:
selected_offenses = ['ASSAULT OFFENSES', 'BURGLARY/BREAKING&ENTERING']
filtered_crime = crime[crime['OffenseParentGroup'].isin(selected_offenses)]
filtered_crime = filtered_crime.groupby(['MCPP', 'OffenseParentGroup']).size().reset_index(name='Count')

# Pivot the DataFrame
pivot_crime = filtered_crime.pivot(index='MCPP', columns='OffenseParentGroup', values='Count')

# Fill NaN values with 0
pivot_crime = pivot_crime.fillna(0).reset_index()

pivot_crime.columns = pivot_crime.columns.str.replace("[' ]", '', regex=True)

# find the mean value of crime types
# this will be later used to set the condition to shade the graph
mean_ao = np.mean(pivot_crime['ASSAULTOFFENSES'])
mean_b = np.mean(pivot_crime['BURGLARY/BREAKING&ENTERING'])

# create chart
chart = alt.Chart(pivot_crime).mark_point(filled=True).encode(
    alt.X('ASSAULTOFFENSES:Q').title('Count of Assault',
                                  ),
    alt.Y('BURGLARY/BREAKING&ENTERING:Q',title='Count of Burglary/Breaking'),
     alt.Text('MCPP:O') 
)

# code for making the points red which have crime value greater than mean
shady_area = alt.Chart(pivot_crime).mark_point(filled=True, color='red').transform_filter(
     (alt.datum['ASSAULTOFFENSES'] > mean_ao ) & 
    (alt.datum['BURGLARY/BREAKING&ENTERING'] > mean_b )

).encode(
    alt.X('ASSAULTOFFENSES:Q'),
    alt.Y('BURGLARY/BREAKING&ENTERING:Q')
)

# code to highlight the red area
highlighted_areas = alt.Chart(pivot_crime).mark_rect(
    color='red',
    opacity=0.1
).encode(
    x='mean(ASSAULTOFFENSES):Q',
    x2='max(ASSAULTOFFENSES):Q',
    y='mean(BURGLARY/BREAKING&ENTERING):Q',
    y2='max(BURGLARY/BREAKING&ENTERING):Q',
    
)

labels = alt.Chart(pivot_crime).mark_text(align='left', dx=5,dy = -6,size = 9).transform_filter(
     (alt.datum['ASSAULTOFFENSES'] > mean_ao ) & 
    (alt.datum['BURGLARY/BREAKING&ENTERING'] > mean_b )

).encode(
    alt.X('ASSAULTOFFENSES:Q'),
    alt.Y('BURGLARY/BREAKING&ENTERING:Q'),
    alt.Text('MCPP:N'))



line_chart_state = alt.layer(chart+labels+highlighted_areas+shady_area).resolve_scale(
    color='independent').properties(
    width=500,
    height=500,
    title = {
        "text" : ['Crime Comparison Across Micro Communities'],
        "subtitle":['Assault and Burglary Offenses'],
        
        "offset":20,
        "subtitlePadding":10,
        "subtitleFontSize":16
    
    }
)


source_text = alt.Chart(
    pd.DataFrame(
        {'source': ['Source: https://data.seattle.gov/Public-Safety/SPD-Crime-Data-2008-Present/']})).mark_text(
    fontSize=12,
    text='source',
    dx=70,
    dy=325
).encode(
    text='source:N'
)

desc_text = alt.Chart(pd.DataFrame({'desc': ['The red-highligthed area in the graph depicts the above average crime count']})).mark_text(
    fontSize=14,
    text='source',
       
    dy=355,
    dx=40
).encode(
    text='desc:N'
)

line_chart_state=line_chart_state+source_text+desc_text



#line_chart_state

# Processing for cencus tract population

Resource: 
- Housing data with Cencustract name/objectid.
- 2010 cencus tract gejson with acre land, 

In [11]:
# Resources: points, block with Tract, nei name, nei-pop-acres. -> aagregate into neighdist 

block_geo = gpd.read_file('2020_Census_Blocks_-_Seattle.geojson', driver = 'GeoJSON')
# Later replace this by the code block of converting geo object
point_geo = gpd.read_file('report_point.json', driver = 'GeoJSON')

housing = pd.read_csv('Selected_Demographic_and_Housing_Estimates_(DP05).csv')
nei_geo = gpd.read_file('Community_Reporting_Areas.geojson', driver = 'GeoJSON')

# Point Geo contains SPD crime data
point_geo = point_geo.merge(crime, right_on = 'ReportNumber', left_on = 'rep_num')
point_geo.drop('rep_num', axis = 1, inplace = True)

# Convert long-lat to point geo, merge back to orginal data

#### APPLY FILTER FOR CRIME TYPE HERE

# Spatial join point within Blocks,
point_in_block = point_geo.sjoin(block_geo, how="left", predicate="within")

# Block statistic and Geo
block_stats_geo = point_in_block.groupby('GEOID_20').agg(total_crime_count = ('NAME', 'count')).reset_index()
block_stats_geo = block_geo.merge(block_stats_geo, on = 'GEOID_20')

# Merging Neiborhood population and geom
## Get the correct neiborhood in housing file: Cencus 2020 in Seattle, aggregated from Tracts
nei_pop = housing[(housing['ACS_VINTAGE'] == '5Y20') & (housing['JURISDICTION'] == 'Seattle')]\
.groupby('CRA_NO').sum()['TOTAL_POPULATION'].reset_index()
## Get corcect neiborhood in tract file: No-water tract
nei_geo = nei_geo[nei_geo['WATER'] ==0]

## Join neiborhood population and geo
nei_pop_geo = nei_geo.merge(nei_pop, on = 'CRA_NO', how = 'inner')

# Aggregate crime count into neiborhood geo
## Summarize neiborhood crime count
nei_crime_count = point_in_block.groupby('CRA_NO').agg(total_crime_count = ('CRA_NO', 'count')).reset_index()
## Merge neiborhood crime count with geom
nei_pop_crime_geo = nei_pop_geo.merge(nei_crime_count, on = 'CRA_NO')

# Dissovled into neighdist, sum pop
neidist = nei_pop_crime_geo.dissolve(
     by="NEIGHDIST",
     aggfunc={
         "TOTAL_POPULATION": "sum",
         "AREA_ACRES": 'sum', 'total_crime_count': 'sum'
     }
 ).reset_index()


# Crime per capita and Crime vs Population dense
nei_pop_crime_geo['crime_per_thoudsand'] = nei_pop_crime_geo['total_crime_count']/nei_pop_crime_geo['TOTAL_POPULATION'] * 1000
nei_pop_crime_geo['pop_dens_sqmiles'] = nei_pop_crime_geo['TOTAL_POPULATION']/ (nei_pop_crime_geo['AREA_SQMI'])

### Graph: 6 Geographic Distribution of Crime Rates in Seattle Neighborhoods 

In [12]:
# Geographic Distribution of Crime Rates
chart_df = nei_pop_crime_geo
seattle_map = folium.Map(location=[47.6095, -122.3171],
                         tiles='cartodbpositron',
                         zoom_start=11,
                         control_scale=True,
                         height='100%')
folium.Choropleth(
    geo_data=chart_df,
    name='Choropleth',
    data=chart_df,
    columns=['GEN_ALIAS', 'total_crime_count'],  
    key_on='feature.properties.GEN_ALIAS',
    fill_color='YlOrRd', 
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Offense Count'
).add_to(seattle_map)



#seattle_map

<folium.features.Choropleth at 0x7f8076c4a3d0>

In [13]:
# Adding names for the neighbors which have highest count of crime
nei_largest_counts_df = nei_pop_crime_geo.nlargest(5, 'total_crime_count')

top_10_crime_nei = nei_largest_counts_df['GEN_ALIAS'].to_list()
chart_df = nei_pop_crime_geo
seattle_map = folium.Map(location=[47.63, -122.3171],
                         tiles='cartodbpositron',
                         zoom_start=13,
                         control_scale=True,
                         height='100%')
folium.Choropleth(
    geo_data=chart_df,
    name='Choropleth',
    data=chart_df,
    columns=['GEN_ALIAS', 'total_crime_count'],  
    key_on='feature.properties.GEN_ALIAS',
    fill_color='YlOrRd', 
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Offense Count'
).add_to(seattle_map)

centroid_list = chart_df.geometry.centroid
for idx, row in chart_df.iterrows():
    neighborhood_name = html.escape(row['GEN_ALIAS'])
    offense_count = row['total_crime_count']
    label = f'{neighborhood_name}'
    if neighborhood_name in top_10_crime_nei:
        icon = folium.DivIcon(html=f'<div style="font-size: 10pt; color: black; ">{label}</div>')
    
        folium.Marker(
        location=[centroid_list[idx].y, centroid_list[idx].x],
        icon=icon,
        
        ).add_to(seattle_map)

#seattle_map

In [14]:

                        
bar_chart = alt.Chart(nei_largest_counts_df).mark_bar(color='steelblue').encode(
    x=alt.X('total_crime_count:Q',
            title='Crime Count', 
            axis=alt.Axis(ticks=False, domain=False,values=[],titleColor='black',titleFontSize=15)
           ),  # Remove x-axis label and ticks
    y=alt.Y('GEN_ALIAS:N', sort='-x', title='',
           axis=alt.Axis(labelFontSize=15,labelLimit=500,
                        labelColor='black')),  # Remove y-axis label and ticks
).properties(
    width=800,
    height=400,
)

text_chart = bar_chart.mark_text(
    align='left',
    baseline='middle',
    dx=3,
).encode(
    text='total_crime_count:Q',
)

chart = (bar_chart + text_chart).properties(
    title='Neighborhoods with the Highest Crime Incidents ',
    width=400,
    height=300
)

source_text = alt.Chart(
    pd.DataFrame(
        {'source': ['Source: https://data.seattle.gov/Public-Safety/SPD-Crime-Data-2008-Present/']})).mark_text(
    fontSize=12,
    text='source',
    dx=40,
    dy=200
).encode(
    text='source:N'
)


chart=chart+source_text

#chart


### Graph7 : Relationship Between Crime Count and Population Per Square Miles 

In [15]:
top5_neig=nei_largest_counts_df.GEN_ALIAS.to_list()
# remove Cascade/Eastlake from the list for now so that it's not overlapping with the red line
top5_neig.remove('Cascade/Eastlake')
crime_popdense_chart = alt.Chart(nei_pop_crime_geo).mark_circle().encode(
    x=alt.X('total_crime_count:Q',title='Crime Count'),
    y=alt.Y('pop_dens_sqmiles:Q',title='Population Density Per Square Miles')
).properties(
height=500,
width=500,
#title = 'Relationship Between Crime Count and Population Density in Neighborhoods')
title = {
        "text" : ['Positive Relationship Between Crime Count and Population Density '],
        "subtitle":['Across Neighborhoods'],
        
        "offset":40,
        "subtitlePadding":10,
        "subtitleFontSize":18
    
    }
)



labels = alt.Chart(nei_pop_crime_geo).mark_text(align='left', dx=5, dy=-6, size=12).transform_filter(
     alt.FieldOneOfPredicate(field='GEN_ALIAS', oneOf=top5_neig )).encode(
    x=alt.X('total_crime_count:Q'),
    y=alt.Y('pop_dens_sqmiles:Q'),
    text='GEN_ALIAS:N'
)


labels_cascade = alt.Chart(nei_pop_crime_geo).mark_text(align='left', dx=-65, dy=-13, size=12).transform_filter(
     alt.datum.GEN_ALIAS=='Cascade/Eastlake' ).encode(
    x=alt.X('total_crime_count:Q'),
    y=alt.Y('pop_dens_sqmiles:Q'),
    text='GEN_ALIAS:N'
)


crime_popdense_chart=crime_popdense_chart + crime_popdense_chart.transform_regression('total_crime_count', 'pop_dens_sqmiles').mark_line(color= 'red')
# Display the combined chart
# Combine the scatter plot and text labels
final_chart = crime_popdense_chart + labels + labels_cascade

source_text = alt.Chart(
    pd.DataFrame(
        {'source': ['Source: https://data.seattle.gov/Public-Safety/SPD-Crime-Data-2008-Present/']})).mark_text(
    fontSize=12,
    text='source',
    dx=70,
    dy=325
).encode(
    text='source:N'
)


final_chart=final_chart+source_text



#final_chart

### Crime Per Thousand

In [16]:
# Zoom into NEIBOUR, filter DIST EAST
chart_df = nei_pop_crime_geo#[nei_pop_crime_geo['NEIGHDIST'] == 'Downtown']

crime_per_thoudsand_seattle_map = folium.Map(location=[47.58, -122.3171],
                         tiles='cartodbpositron',
                         zoom_start=12,
                         control_scale=True,
                         height='100%')
folium.Choropleth(
    geo_data=chart_df,
    name='Choropleth',
    data=chart_df,
    columns=['GEN_ALIAS', 'crime_per_thoudsand'],  
    key_on='feature.properties.GEN_ALIAS',
    fill_color='YlOrRd', 
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Crime per capita (per 1,0000)'
).add_to(crime_per_thoudsand_seattle_map)

centroid_list = chart_df.geometry.centroid
for idx, row in chart_df.iterrows():
   
    neighborhood_name = html.escape(row['GEN_ALIAS'])
    offense_count = row['crime_per_thoudsand']
    if row['GEN_ALIAS'] in(['Downtown Commercial Core', 'Georgetown', 'Duwamish/SODO','Queen Anne']):
        label = f'{neighborhood_name}'
    
        icon = folium.DivIcon(html=f'<div style="font-size: 10pt; color: black; ">{label}</div>')
    
        folium.Marker(
            location=[centroid_list[idx].y, centroid_list[idx].x],
            icon=icon,
        
        ).add_to(crime_per_thoudsand_seattle_map)

#crime_per_thoudsand_seattle_map

### Graph: Crime per Density

In [17]:
nei_pop_crime_geo['crime_density'] = nei_pop_crime_geo['total_crime_count'] / nei_pop_crime_geo['AREA_SQMI']
nei_pop_crime_geo = nei_pop_crime_geo.drop(['SHAPE_Length', 'SHAPE_Area', 'WATER','SE_ANNO_CAD_DATA'], axis = 1)

In [18]:
density_area_text = ['Central Area/Squire Park',
 'First Hill',
 'Capitol Hill', 
 'Cascade/Eastlake',
 'Belltown',
 'Downtown Commercial Core',
 'Pioneer Square/International District',
 'Queen Anne']

In [19]:
# Zoom into NEIBOUR, filter DIST EAST
chart_df = nei_pop_crime_geo#[nei_pop_crime_geo['NEIGHDIST'] == 'Downtown']

crime_density_seattle_map = folium.Map(location=[47.6095, -122.3171],
                         tiles='cartodbpositron',
                         zoom_start=13,
                         control_scale=True,
                         height='100%')
folium.Choropleth(
    geo_data=chart_df,
    name='Choropleth',
    data=chart_df,
    columns=['GEN_ALIAS', 'crime_density'],  
    key_on='feature.properties.GEN_ALIAS',
    fill_color='YlOrRd', 
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Crime Density (per Squared Miles)'
).add_to(crime_density_seattle_map)

centroid_list = chart_df.geometry.centroid
for idx, row in chart_df.iterrows():
   
    neighborhood_name = html.escape(row['GEN_ALIAS'])
    offense_count = row['crime_density']
    if row['GEN_ALIAS'] in density_area_text:
        label = f'{neighborhood_name}'
    
        icon = folium.DivIcon(html=f'<div style="font-size: 10pt; color: black; ">{label}</div>')
    
        folium.Marker(
            location=[centroid_list[idx].y, centroid_list[idx].x],
            icon=icon,
        
        ).add_to(crime_density_seattle_map)

#crime_density_seattle_map

### Graph: Downtown Seattle Map With Crime Count

In [20]:
# Filter out the map for Downtown Commercial Core
chart_df = block_stats_geo[block_stats_geo['GEN_ALIAS'] == 'Downtown Commercial Core']

downtown_seattle_map = folium.Map(location=[47.6095, -122.3300],
                         tiles='cartodbpositron',
                         zoom_start=14,
                         control_scale=True,
                         height='100%')
folium.Choropleth(
    geo_data=chart_df,
    name='Choropleth',
    data=chart_df,
    columns=['NAME', 'total_crime_count'],  
    key_on='feature.properties.NAME',
    fill_color='YlOrRd', 
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Offense Count'
).add_to(downtown_seattle_map)


#downtown_seattle_map

<folium.features.Choropleth at 0x7f8040385a00>