# 06 — Gun Homicide vs Gun Ownership Rate

Scatter plot with trend line exploring the correlation between civilian gun ownership
(firearms per 100 persons, Small Arms Survey 2017) and gun homicide rates.

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from scipy import stats
from pathlib import Path

DATA_DIR = Path('../data/processed')
df = pd.read_csv(DATA_DIR / 'merged_country_data.csv')

# Filter to countries with both gun ownership and gun homicide data
plot_df = df.dropna(subset=['guns_per_100', 'gun_homicide_rate']).copy()
print(f"Countries with both gun ownership and gun homicide data: {len(plot_df)}")

Countries with both gun ownership and gun homicide data: 160


## Scatter Plot — Gun Homicide Rate vs Gun Ownership

In [2]:
# Linear regression on log-transformed homicide rate
plot_df['log_homicide'] = np.log10(plot_df['gun_homicide_rate'].clip(lower=0.01))

slope, intercept, r_value, p_value, std_err = stats.linregress(
    plot_df['guns_per_100'], plot_df['log_homicide']
)
r_squared = r_value ** 2

print(f"Linear regression (guns per 100 vs log10 gun homicide rate):")
print(f"  R² = {r_squared:.4f}")
print(f"  p-value = {p_value:.2e}")
print(f"  slope = {slope:.4f}")

Linear regression (guns per 100 vs log10 gun homicide rate):
  R² = 0.0064
  p-value = 3.16e-01
  slope = -0.0044


In [3]:
fig = px.scatter(
    plot_df,
    x='guns_per_100',
    y='gun_homicide_rate',
    color='region',
    hover_name='country_name',
    hover_data={'guns_per_100': ':.1f', 'gun_homicide_rate': ':.2f', 'region': True},
    log_y=True,
    title=f'Gun Homicide Rate vs Gun Ownership (R²={r_squared:.3f}, p={p_value:.2e})',
    labels={
        'guns_per_100': 'Civilian Firearms per 100 Persons',
        'gun_homicide_rate': 'Gun Homicide Rate per 100K (log scale)',
        'region': 'Region',
    },
)

# Add trend line
x_range = np.linspace(plot_df['guns_per_100'].min(), plot_df['guns_per_100'].max(), 100)
y_trend = 10 ** (slope * x_range + intercept)
fig.add_trace(go.Scatter(
    x=x_range, y=y_trend,
    mode='lines',
    name=f'Trend (R²={r_squared:.3f})',
    line=dict(color='red', dash='dash', width=2),
))

# Annotate notable outliers
#outliers = plot_df[plot_df['country_code'].isin(['USA', 'YEM', 'JPN', 'CHN', 'VEN', 'JAM', 'HND'])]
#for _, row in outliers.iterrows():
#    fig.add_annotation(
#        x=row['guns_per_100'], y=row['gun_homicide_rate'],
#        text=row['country_name'], showarrow=True, arrowhead=2,
#        ax=20, ay=-25, font=dict(size=10),
#    )

fig.update_layout(template='plotly_white', height=600)
fig.show()

## Gun Ownership Choropleth Map

In [4]:
ownership_map = df.dropna(subset=['guns_per_100'])

fig = px.choropleth(
    ownership_map,
    locations='country_code',
    color='guns_per_100',
    hover_name='country_name',
    hover_data={'guns_per_100': ':.1f', 'country_code': False},
    color_continuous_scale='YlOrRd',
    title='Civilian Firearm Ownership per 100 Persons (Small Arms Survey 2017)',
    labels={'guns_per_100': 'Guns per 100'},
)
fig.update_layout(
    geo=dict(showframe=False, showcoastlines=True, coastlinecolor='#999'),
    template='plotly_white',
    height=500,
)
fig.show()