## Notebook to Generate all our Plots

Importing the police data:

In [1]:
import numpy as np
import pandas as pd

data = pd.read_csv("../PoliceData.csv") 
data['Date']= pd.to_datetime(data['Date']) # Make the date column date datatype
data['Time']= pd.to_datetime(data['Time']) # Make the time column date datatype

data['Category'] = data['Category'].replace('DRIVING UNDER THE INFLUENCE', 'DUI')

  data['Time']= pd.to_datetime(data['Time']) # Make the time column date datatype


Filtering for the drug/narcotic crimes and seeing all the different descriptions:

In [2]:
drugCrimes = data[data['Category'] == 'DRUG/NARCOTIC']
print(drugCrimes['Descript'].unique())

drugCrimeTypes = drugCrimes['Descript'].unique()
print(len(drugCrimeTypes))

['POSSESSION OF HEROIN' 'POSSESSION OF METH-AMPHETAMINE'
 'POSSESSION OF NARCOTICS PARAPHERNALIA' 'PLANTING/CULTIVATING MARIJUANA'
 'CONTROLLED SUBSTANCE VIOLATION, LOITERING FOR' 'POSSESSION OF MARIJUANA'
 'POSSESSION OF HEROIN FOR SALES'
 'POSSESSION OF BASE/ROCK COCAINE FOR SALE' 'SALE OF CONTROLLED SUBSTANCE'
 'POSSESSION OF BASE/ROCK COCAINE' 'POSSESSION OF COCAINE'
 'SALE OF MARIJUANA' 'SALE OF BASE/ROCK COCAINE'
 'POSSESSION OF MARIJUANA FOR SALES'
 'POSSESSION OF METH-AMPHETAMINE FOR SALE'
 'POSSESSION OF CONTROLLED SUBSTANCE'
 'LOITERING WHERE NARCOTICS ARE SOLD/USED' 'POSSESSION OF METHADONE'
 'UNDER THE INFLUENCE OF CONTROLLED SUBSTANCES'
 'TRANSPORTATION OF MARIJUANA' 'FORGE OR ALTER PRESCRIPTION'
 'POSSESSION OF OPIATES' 'SALE OF HEROIN' 'POSSESSION OF AMPHETAMINE'
 'POSSESSION OF CONTROLLED SUBSTANCE FOR SALE' 'SALE OF AMPHETAMINE'
 'SALES COCAINE BASE/SCHOOLYARD TRAFFICKING ACT VIO'
 'MAINTAINING PREMISE WHERE NARCOTICS ARE SOLD/USED'
 'POSSESSION OF COCAINE FOR SALES' '

Adding labels based on description for the drug type:

In [19]:
import numpy as np

# Define conditions and corresponding labels
conditions = [
    drugCrimes['Descript'].str.contains('heroin', case=False),
    drugCrimes['Descript'].str.contains('opium|opiates', case=False),
    drugCrimes['Descript'].str.contains('prescription', case=False),
    drugCrimes['Descript'].str.contains('meth-amphetamine', case=False),
    drugCrimes['Descript'].str.contains('amphetamine', case=False),
    drugCrimes['Descript'].str.contains('methadone', case=False),
    drugCrimes['Descript'].str.contains('marijuana', case=False),
    drugCrimes['Descript'].str.contains('base/rock cocaine', case=False),
    drugCrimes['Descript'].str.contains('cocaine', case=False),
    drugCrimes['Descript'].str.contains('barbituates', case=False),
    drugCrimes['Descript'].str.contains('hallucinogenic', case=False),
    drugCrimes['Descript'].str.contains('narcotics|substance', case=False),
]

labels = ['Heroin', 'Opium/Opiates', 'Prescription', 'Methamphetamine', 'Amphetamine', 'Methadone', 'Marijuana', 'Base/rock cocaine', 'Cocaine', 'Barbituates', 'Hallucinogenic', 'Narcotics/Substance']

# Apply conditions and assign labels
drugCrimes.loc[:, 'drugtype'] = np.select(conditions, labels, default='Other')

# Print unique values of the 'drugtype' column for verification
drugCrimes[['drugtype', 'Descript']].head()


Unnamed: 0,drugtype,Descript
18,Heroin,POSSESSION OF HEROIN
31,Methamphetamine,POSSESSION OF METH-AMPHETAMINE
37,Narcotics/Substance,POSSESSION OF NARCOTICS PARAPHERNALIA
80,Narcotics/Substance,POSSESSION OF NARCOTICS PARAPHERNALIA
167,Marijuana,PLANTING/CULTIVATING MARIJUANA


Normalising count for every drug type for every year:

In [20]:
labels.append("Other")

yearGroups = pd.DataFrame()
yearGroups.index.name = 'Year'

drugCrimesFilter = drugCrimes[drugCrimes['Date'].dt.year != 2018]

for i in labels:
    drugCrime = drugCrimesFilter[drugCrimesFilter['drugtype'] == i]
    yearCount = drugCrime.groupby(drugCrime['Date'].dt.year).size()
    yearTotal = yearCount.sum()
    yearGroups[i] = yearCount / yearTotal

yearGroups = yearGroups.fillna(0)
yearGroups.head()

Unnamed: 0_level_0,Heroin,Opium/Opiates,Prescription,Methamphetamine,Amphetamine,Methadone,Marijuana,Base/rock cocaine,Cocaine,Barbituates,Hallucinogenic,Narcotics/Substance,Other
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2003,0.082205,0.061166,0.082734,0.060631,0.18097,0.019048,0.085453,0.101849,0.085769,0.0,0.053846,0.072924,0.142064
2004,0.079349,0.044097,0.064748,0.053113,0.176306,0.022222,0.080934,0.107146,0.085077,0.0,0.041538,0.076648,0.100999
2005,0.063206,0.046942,0.043165,0.056137,0.138993,0.015873,0.057683,0.09535,0.078506,0.0,0.049231,0.068115,0.075472
2006,0.063206,0.031294,0.039568,0.046004,0.117537,0.025397,0.063042,0.100972,0.080062,0.045455,0.061538,0.07744,0.109878
2007,0.084441,0.056899,0.02518,0.047393,0.136194,0.038095,0.087271,0.113061,0.082829,0.090909,0.075385,0.086295,0.077691


## Plot 1: Normalised Count Time Series

Interactive bokeh time series based on the prior table:

In [21]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Legend
from bokeh.io import output_notebook, output_file
from bokeh.palettes import Category20_14

yearGroups.index = yearGroups.index.astype(str)

# Create a Bokeh ColumnDataSource from the DataFrame
source = ColumnDataSource(yearGroups)

output_file("TimeSeries.html")

# Create a Bokeh figure
p = figure(x_range=yearGroups.index.values, height=400, width=800, title="Normalized Yearly Drug Crime Counts",
           toolbar_location=None, tools="", y_axis_label="Normalized Count")

# Create an empty Legend object
legend = Legend()

# Add the legend to the plot on the right side
p.add_layout(legend, 'right')

# Plot vertical bars for each crime category
categories = list(yearGroups.columns)

palette = [color for color in Category20_14]

bar = {}

for idx, i in enumerate(categories):
    bar[i] = p.vbar(x='Year', top=i, source=source, width=0.5, legend_label=i,
           color=palette[idx], muted_color=palette[idx], fill_alpha=0.8, 
           muted_alpha=0.1, muted=True)

# Set attributes for the plot
p.y_range.start = 0
p.xaxis.axis_label = "Year"
p.yaxis.axis_label = "Normalized Count"
p.legend.click_policy = "mute"
p.title.text_font = "Helvetica"
p.title.align = "center"
p.xaxis.axis_label_text_font = "Helvetica"
p.yaxis.axis_label_text_font = "Helvetica"
p.legend.label_text_font = "Helvetica"

# Show the plot
output_notebook()
show(p)


Filtering for a specific month and coloring by drug type:

In [28]:
monthData1 = drugCrimes[drugCrimes['Date'].dt.year == 2014]
monthData = monthData1[monthData1['Date'].dt.month == 4]
locationData = monthData[['X', 'Y', 'drugtype', 'Descript']]

# Provided lists
drug_types = ['Heroin', 'Opium/Opiates', 'Prescription', 'Methamphetamine', 'Amphetamine', 'Methadone', 'Marijuana', 'Base/rock cocaine', 'Cocaine', 'Barbituates', 'Hallucinogenic', 'Narcotics/Substance', 'Other']
hex_colors = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2']
# Convert hexadecimal color codes to RGB values
rgb_colors = [tuple(int(hex_color[i:i+2], 16) for i in (1, 3, 5)) for hex_color in hex_colors]

# Create a dictionary to map drug types to colors
drug_type_color_mapping = dict(zip(drug_types, rgb_colors))

# Function to get the color based on drug type
def get_color(drugtype):
    return drug_type_color_mapping.get(drugtype, [255, 140, 0])  # Default color for unknown types

# Add a new column 'color' to the DataFrame using the 'drugtype' column
locationData['color'] = locationData['drugtype'].map(get_color)

# Define the amount of jitter (adjust as needed)
jitter_amount = 0.00017  # You can adjust this value based on your preference

# Function to add jitter to coordinates
def add_jitter(coord):
    return coord + np.random.uniform(-jitter_amount, jitter_amount)

# Identify rows where both X and Y coordinates match
duplicates_mask = locationData.duplicated(subset=['X', 'Y'], keep=False)

# Apply jitter to X and Y coordinates where both X and Y match
locationData['X'] = np.where(locationData.duplicated(subset=['X'], keep=False), 
                             locationData['X'].apply(add_jitter),
                             locationData['X'])

locationData['Y'] = np.where(locationData.duplicated(subset=['Y'], keep=False), 
                             locationData['Y'].apply(add_jitter),
                             locationData['Y'])

locationData.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  locationData['color'] = locationData['drugtype'].map(get_color)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  locationData['X'] = np.where(locationData.duplicated(subset=['X'], keep=False),
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  locationData['Y'] = np.where(locationData.duplicated(subset=[

Unnamed: 0,X,Y,drugtype,Descript,color
1451095,-122.405829,37.768475,Methamphetamine,POSSESSION OF METH-AMPHETAMINE,"(255, 187, 120)"
1451167,-122.417807,37.785145,Base/rock cocaine,SALE OF BASE/ROCK COCAINE,"(255, 152, 150)"
1451438,-122.391523,37.732432,Heroin,POSSESSION OF HEROIN,"(31, 119, 180)"
1451946,-122.395239,37.73826,Cocaine,POSSESSION OF COCAINE,"(148, 103, 189)"
1451968,-122.407852,37.780033,Heroin,POSSESSION OF HEROIN,"(31, 119, 180)"


## Plot 2: Geo Scatter colored by Drug Type

In [29]:
import pydeck as pdk
import pandas as pd
import math

SCATTERPLOT_LAYER_DATA = "https://raw.githubusercontent.com/visgl/deck.gl-data/master/website/bart-stations.json"
df = pd.read_json(SCATTERPLOT_LAYER_DATA)

# Use pandas to calculate additional data
df["exits_radius"] = df["exits"].apply(lambda exits_count: math.sqrt(exits_count))

# Define a layer to display on a map
layer = pdk.Layer(
    "ScatterplotLayer",
    locationData,
    pickable=True,
    opacity=0.3,
    stroked=False,
    filled=True,
    radius_scale=3,
    radius_min_pixels=8,
    radius_max_pixels=20,
    line_width_min_pixels=1,
    get_position=["X", "Y"],
    get_fill_color="color",
    get_line_color=[0, 0, 0],
    radius_unit='pixels'
)

# Set the viewport location
view_state = pdk.ViewState(latitude=37.7749295, longitude=-122.4194155, zoom=10, bearing=0, pitch=0)

r = pdk.Deck(
    layers=[layer],
    initial_view_state=view_state,
    tooltip={
        "html": "{Descript} <br /> some other shit",  # Change tooltip text here
        "style": {
            "backgroundColor": "rgba(0, 0, 0, 0.7)",
            "color": "white",
            "fontFamily": "Helvetica, sans-serif"
        }
    }
)

# Render
r.to_html("MapPlot.html")

## Plot 3: Normalised Drug Type Count against Police District