# 04 — Gun Homicide vs Drug Offenses

Scatter plot with trend line exploring the correlation between drug offense rates and gun homicide rates.

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from scipy import stats
from pathlib import Path

DATA_DIR = Path('../data/processed')
df = pd.read_csv(DATA_DIR / 'merged_country_data.csv')

# Filter to countries with both drug offense and gun homicide data
plot_df = df.dropna(subset=['drug_offense_rate', 'gun_homicide_rate']).copy()
print(f"Countries with both drug offense and gun homicide data: {len(plot_df)}")

Countries with both drug offense and gun homicide data: 121


## Scatter Plot — Gun Homicide Rate vs Drug Offense Rate

In [2]:
# Linear regression on log-transformed values
plot_df['log_homicide'] = np.log10(plot_df['gun_homicide_rate'].clip(lower=0.01))
plot_df['log_drug'] = np.log10(plot_df['drug_offense_rate'].clip(lower=1.0))

slope, intercept, r_value, p_value, std_err = stats.linregress(
    plot_df['log_drug'], plot_df['log_homicide']
)
r_squared = r_value ** 2

print(f"Linear regression (log drug offense vs log gun homicide):")
print(f"  R² = {r_squared:.4f}")
print(f"  p-value = {p_value:.2e}")
print(f"  slope = {slope:.4f}")

Linear regression (log drug offense vs log gun homicide):
  R² = 0.0281
  p-value = 6.59e-02
  slope = -0.3093


In [3]:
fig = px.scatter(
    plot_df,
    x='drug_offense_rate',
    y='gun_homicide_rate',
    color='region',
    hover_name='country_name',
    hover_data={'drug_offense_rate': ':.1f', 'gun_homicide_rate': ':.2f', 'region': True},
    log_x=True,
    log_y=True,
    title=f'Gun Homicide Rate vs Drug Offense Rate (R²={r_squared:.3f}, p={p_value:.2e})',
    labels={
        'drug_offense_rate': 'Drug Offense Rate per 100K (log scale)',
        'gun_homicide_rate': 'Gun Homicide Rate per 100K (log scale)',
        'region': 'Region',
    },
)

# Add trend line
x_range = np.linspace(plot_df['log_drug'].min(), plot_df['log_drug'].max(), 100)
y_trend = 10 ** (slope * x_range + intercept)
x_trend = 10 ** x_range
fig.add_trace(go.Scatter(
    x=x_trend, y=y_trend,
    mode='lines',
    name=f'Trend (R²={r_squared:.3f})',
    line=dict(color='red', dash='dash', width=2),
))

fig.update_layout(template='plotly_white', height=600)
fig.show()

## Drug Offense Rate Choropleth (Reference)

In [4]:
drug_map = df.dropna(subset=['drug_offense_rate'])

fig = px.choropleth(
    drug_map,
    locations='country_code',
    color='drug_offense_rate',
    hover_name='country_name',
    hover_data={'drug_offense_rate': ':.1f', 'country_code': False},
    color_continuous_scale='YlOrBr',
    title='Drug Offense Rate per 100K by Country',
    labels={'drug_offense_rate': 'Rate per 100K'},
)
fig.update_layout(
    geo=dict(showframe=False, showcoastlines=True, coastlinecolor='#999'),
    template='plotly_white',
    height=500,
)
fig.show()