In [12]:
import pandas as pd
import altair as alt

# Step 1: Load the cleaned dataset
# Replace with the path to your cleaned CSV file
cleaned_data_file = "Crime_Reports_20250109.csv"
crime_data = pd.read_csv(cleaned_data_file)

# Step 2: Data Preparation

# Narrative 1: Crime Types by Location
crime_by_location = crime_data.groupby(['Location Type', 'Highest Offense Description']).size().reset_index(name='Count')
total_crimes_by_location = crime_data.groupby('Location Type').size().reset_index(name='Total Crimes')
crime_by_location = crime_by_location.merge(total_crimes_by_location, on='Location Type')
crime_by_location['Proportion'] = crime_by_location['Count'] / crime_by_location['Total Crimes']

# Narrative 2: Clearance Rates by Offense and District
# Clearance rates by offense
clearance_by_offense = crime_data.groupby('Highest Offense Description').agg(
    Total_Incidents=('Incident Number', 'size'),
    Cleared_Incidents=('Clearance Status', lambda x: (x == 'C').sum())
).reset_index()
clearance_by_offense['Clearance Rate'] = (
    clearance_by_offense['Cleared_Incidents'] / clearance_by_offense['Total_Incidents']
) * 100

# Clearance rates by district
clearance_by_district = crime_data.groupby('Council District').agg(
    Total_Incidents=('Incident Number', 'size'),
    Cleared_Incidents=('Clearance Status', lambda x: (x == 'C').sum())
).reset_index()
clearance_by_district['Clearance Rate'] = (
    clearance_by_district['Cleared_Incidents'] / clearance_by_district['Total_Incidents']
) * 100

# Narrative 3: Incident Concentrations by District
incident_counts_by_district = crime_data.groupby('Council District').size().reset_index(name='Incident Count')

# Step 3: Visualizations

# Narrative 1: Crime Types by Location (Altair Bar Chart)
crime_chart = alt.Chart(crime_by_location).mark_bar().encode(
    x=alt.X('Location Type:N', sort='-y', title='Location Type'),
    y=alt.Y('Count:Q', title='Crime Count'),
    color='Highest Offense Description:N',
    tooltip=['Location Type', 'Highest Offense Description', 'Count']
).properties(
    title='Crime Types by Location',
    width=800,
    height=400
)

# Narrative 2: Clearance Rates by Offense (Altair Bar Chart)
top_offenses = clearance_by_offense.nlargest(10, 'Total_Incidents')
clearance_chart = alt.Chart(top_offenses).mark_bar().encode(
    x=alt.X('Highest Offense Description:N', sort='-y', title='Offense Type'),
    y=alt.Y('Clearance Rate:Q', title='Clearance Rate (%)'),
    tooltip=[
        alt.Tooltip('Highest Offense Description:N', title='Offense Type'),
        alt.Tooltip('Clearance Rate:Q', title='Clearance Rate (%)', format='.2f'),
        alt.Tooltip('Total_Incidents:Q', title='Total Incidents')
    ]
).properties(
    title='Clearance Rates by Offense Type (Top 10 Offenses)',
    width=800,
    height=400
)

# Narrative 3: Incident Concentrations by District (Altair Bar Chart)
district_chart = alt.Chart(incident_counts_by_district).mark_bar().encode(
    x=alt.X('Council District:N', sort='-y', title='Council District'),
    y=alt.Y('Incident Count:Q', title='Incident Count'),
    tooltip=['Council District', 'Incident Count']
).properties(
    title='Incident Concentrations by Council District',
    width=800,
    height=400
)

# Step 4: Display or Save Visualizations
# Save the charts as HTML
crime_chart.save('crime_types_by_location.html')
clearance_chart.save('clearance_rates_by_offense.html')
district_chart.save('incident_concentrations_by_district.html')

print("Visualizations saved as HTML files. Open them in your browser to view.")


Visualizations saved as HTML files. Open them in your browser to view.


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [13]:
# Step 1: Load the cleaned dataset
cleaned_data_file = "Crime_Reports_20250109.csv"  # Replace with your actual file path
crime_data = pd.read_csv(cleaned_data_file)

# Step 2: Prepare the data
# Group by incident type and location
crime_by_location = crime_data.groupby(['Location Type', 'Highest Offense Description']).size().reset_index(name='Count')

# Step 3: Create a dropdown selection for incident type
incident_dropdown = alt.binding_select(
    options=list(crime_by_location['Highest Offense Description'].unique()),
    name='Incident Type: '
)
incident_selection = alt.selection_point(
    fields=['Highest Offense Description'],
    bind=incident_dropdown,
    value=list(crime_by_location['Highest Offense Description'].unique())[0]  # Default value must match the dropdown options
)

# Step 4: Filtered chart based on the selection
dropdown_chart = alt.Chart(crime_by_location).mark_bar().encode(
    x=alt.X('Location Type:N', sort='-y', title='Location Type'),
    y=alt.Y('Count:Q', title='Crime Count'),
    tooltip=['Location Type', 'Count', 'Highest Offense Description'],
    color=alt.Color('Location Type:N', legend=None)
).add_selection(
    incident_selection
).transform_filter(
    incident_selection
).properties(
    title='Crime Count by Location Type for Selected Incident Type',
    width=800,
    height=400
)

# Step 5: Save or display the chart
dropdown_chart.save('crime_count_by_incident_dropdown.html')
print("Visualization saved as 'crime_count_by_incident_dropdown.html'. Open this file in your browser to view.")


Visualization saved as 'crime_count_by_incident_dropdown.html'. Open this file in your browser to view.


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
