# 02 · Trend Analysis — Baltimore IGS

This notebook loads cleaned outputs from `01_data_cleaning.ipynb` and produces quick insights:
- Top / bottom movers by primary score (latest YoY)
- Summary tables (by year)
- Distribution plots for YoY changes
- Simple tract spotlight helper

**Inputs** (expected):
- `../data_clean/baltimore_igs_merged.csv`
- `../data_clean/baltimore_igs_yoy_deltas.csv`

**Outputs**:
- `../visuals/igs_yoy_histogram.png`
- `../visuals/igs_top10_primary_yoy.png`
- `../visuals/igs_bottom10_primary_yoy.png`


In [None]:
# Imports & paths
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

CLEAN_DIR = Path('../data_clean')
VIS_DIR = Path('../visuals')
VIS_DIR.mkdir(parents=True, exist_ok=True)

merged_path = CLEAN_DIR / 'baltimore_igs_merged.csv'
deltas_path = CLEAN_DIR / 'baltimore_igs_yoy_deltas.csv'
merged_path, deltas_path, VIS_DIR

In [None]:
# Load data (will raise a clear error if not present)
merged = pd.read_csv(merged_path)
deltas = pd.read_csv(deltas_path)
merged.head(), deltas.head()

## Identify primary score column

In [None]:
score_candidates = [c for c in merged.columns if c.lower() in {'score','overall_score','inclusive_growth_score'} or 'score' in c.lower()]
primary = score_candidates[0] if score_candidates else None
primary

## Top / Bottom movers in latest year (primary score YoY)

In [None]:
if primary is None:
    raise ValueError('Could not infer a primary score column. Please confirm in 01 notebook.')

# Derive YoY for primary directly from merged table (just in case)
tmp = merged.sort_values(['geoid','year']).copy()
tmp['primary_score_yoy'] = tmp.groupby('geoid')[primary].diff()
last_year = int(tmp['year'].max())
latest = tmp[tmp['year']==last_year][['geoid','year','primary_score_yoy']].dropna()
top10 = latest.sort_values('primary_score_yoy', ascending=False).head(10)
bottom10 = latest.sort_values('primary_score_yoy', ascending=True).head(10)
top10, bottom10

## Plot: Distribution of YoY changes (primary score)

In [None]:
plt.figure()
latest['primary_score_yoy'].hist(bins=30)
plt.title('Distribution of YoY change in primary score (latest year)')
plt.xlabel('YoY change')
plt.ylabel('Count')
hist_path = VIS_DIR / 'igs_yoy_histogram.png'
plt.savefig(hist_path, bbox_inches='tight')
hist_path

## Plot: Top 10 & Bottom 10 Movers (bar charts)

In [None]:
fig = plt.figure()
top10_sorted = top10.sort_values('primary_score_yoy', ascending=True)
plt.barh(top10_sorted['geoid'], top10_sorted['primary_score_yoy'])
plt.title('Top 10 Tracts by YoY (primary score) — latest year')
plt.xlabel('YoY change')
top_path = VIS_DIR / 'igs_top10_primary_yoy.png'
plt.tight_layout(); plt.savefig(top_path, bbox_inches='tight'); top_path

In [None]:
fig = plt.figure()
bottom10_sorted = bottom10.sort_values('primary_score_yoy', ascending=True)
plt.barh(bottom10_sorted['geoid'], bottom10_sorted['primary_score_yoy'])
plt.title('Bottom 10 Tracts by YoY (primary score) — latest year')
plt.xlabel('YoY change')
bot_path = VIS_DIR / 'igs_bottom10_primary_yoy.png'
plt.tight_layout(); plt.savefig(bot_path, bbox_inches='tight'); bot_path

## Yearly summary table

In [None]:
summary = (merged
           .groupby('year')
           [primary]
           .agg(['count','mean','median','std','min','max'])
           .reset_index())
summary

## Tract spotlight helper (enter a GEOID)

In [None]:
def tract_spotlight(geoid: str):
    geoid = str(geoid).zfill(11)
    hist = merged[merged['geoid']==geoid].sort_values('year')[['year', primary]]
    yoy = tmp[tmp['geoid']==geoid][['year','primary_score_yoy']]
    display(hist)
    display(yoy)

# Example usage (replace with a tract of interest):
# tract_spotlight('24510080100')