## Setup

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime

print("üìä Crime Data Processing Setup")
print("‚úÖ Libraries loaded")

## Load Crime Data

**Note:** This cell assumes you've downloaded the crime data CSV from the Forney PD portal and saved it as `gateway_crime_data.csv` in the analysis directory.

In [None]:
# Load the crime data CSV
crime_file = Path('gateway_crime_data.csv')

if crime_file.exists():
    print(f"üìÇ Loading crime data from {crime_file}...")
    
    # Read CSV - adjust column names based on actual export
    crime_df = pd.read_csv(crime_file)
    
    print(f"\n‚úÖ Loaded {len(crime_df):,} crime incidents")
    print(f"\nüìã Available columns:")
    for col in crime_df.columns:
        print(f"  - {col}")
    
    # Display first few records
    print(f"\nüìä Sample data:")
    display(crime_df.head())
else:
    print("‚ùå Crime data file not found!")
    print(f"\nPlease download crime data from Forney PD portal and save as:")
    print(f"  {crime_file.absolute()}")
    print(f"\nSee instructions in the markdown cell above.")

## Parse and Standardize Street Names

Extract street names from addresses to match with CAD property data.

In [None]:
# This cell will process street names once we know the address field name
# Example processing:

if crime_file.exists():
    # Adjust 'Address' to actual field name from your CSV
    address_field = 'Address'  # UPDATE THIS based on actual column name
    
    if address_field in crime_df.columns:
        # Extract street name (basic example - may need refinement)
        crime_df['street_name'] = crime_df[address_field].str.extract(r'(\w+\s+(?:DR|DRIVE|CT|COURT|ST|STREET|LN|LANE|WAY))', flags=re.IGNORECASE)[0]
        crime_df['street_name'] = crime_df['street_name'].str.upper().str.strip()
        
        print("üìç Unique streets in crime data:")
        print(crime_df['street_name'].value_counts())
    else:
        print(f"‚ö†Ô∏è  Address field '{address_field}' not found in data")
        print(f"Available columns: {list(crime_df.columns)}")

## Filter for Gateway Parks Area

Focus on high-crime streets: Arbor, Pueblo, Everglades

In [None]:
if crime_file.exists() and 'street_name' in crime_df.columns:
    # Gateway Parks target streets
    target_streets = ['ARBOR', 'PUEBLO', 'EVERGLADES']
    
    # Filter for target streets (partial match)
    gateway_crimes = crime_df[crime_df['street_name'].str.contains('|'.join(target_streets), na=False)]
    
    print(f"üéØ Gateway Parks Crime Incidents: {len(gateway_crimes):,}")
    print(f"\nüìä Breakdown by street:")
    print(gateway_crimes['street_name'].value_counts())
    
    # Save filtered dataset
    gateway_crimes.to_csv('gateway_crime_filtered.csv', index=False)
    print(f"\n‚úÖ Saved filtered data to gateway_crime_filtered.csv")

## Crime Category Analysis

In [None]:
if crime_file.exists():
    # Adjust 'Category' or 'Incident_Type' to actual field name
    category_field = 'Category'  # UPDATE THIS
    
    if category_field in gateway_crimes.columns:
        print("üö® Crime Types in Gateway Parks:")
        print(gateway_crimes[category_field].value_counts())
        
        # Calculate percentages
        crime_counts = gateway_crimes[category_field].value_counts()
        crime_pct = (crime_counts / len(gateway_crimes) * 100).round(1)
        
        print(f"\nüìä Crime Distribution:")
        for crime_type, count in crime_counts.head(10).items():
            pct = crime_pct[crime_type]
            print(f"  {crime_type}: {count:,} ({pct}%)")

## Temporal Analysis - Crime Trends

Analyze crime patterns over time to support/refute "recent increase" claim.

In [None]:
if crime_file.exists():
    # Adjust date field name
    date_field = 'Incident_Date'  # UPDATE THIS
    
    if date_field in crime_df.columns:
        # Parse dates
        crime_df[date_field] = pd.to_datetime(crime_df[date_field])
        crime_df['year'] = crime_df[date_field].dt.year
        crime_df['month'] = crime_df[date_field].dt.to_period('M')
        
        # Yearly trends
        print("üìÖ Crime Incidents by Year:")
        yearly_counts = crime_df['year'].value_counts().sort_index()
        for year, count in yearly_counts.items():
            print(f"  {year}: {count:,} incidents")
        
        # Calculate year-over-year change
        if len(yearly_counts) > 1:
            print(f"\nüìà Trends:")
            for i in range(1, len(yearly_counts)):
                prev_year = yearly_counts.index[i-1]
                curr_year = yearly_counts.index[i]
                prev_count = yearly_counts.iloc[i-1]
                curr_count = yearly_counts.iloc[i]
                change = ((curr_count - prev_count) / prev_count * 100)
                direction = "üìà" if change > 0 else "üìâ"
                print(f"  {prev_year} ‚Üí {curr_year}: {change:+.1f}% {direction}")

## Export for Main Analysis

Create a clean dataset for the ownership-crime correlation analysis.

In [None]:
if crime_file.exists() and 'street_name' in crime_df.columns:
    # Create summary by street
    street_summary = gateway_crimes.groupby('street_name').agg({
        date_field: 'count'  # Total incidents
    }).rename(columns={date_field: 'total_incidents'})
    
    # Add crime density if we have unique addresses
    street_summary = street_summary.sort_values('total_incidents', ascending=False)
    
    print("üìä Crime Summary by Street:")
    print(street_summary)
    
    # Export
    street_summary.to_csv('gateway_crime_by_street.csv')
    print(f"\n‚úÖ Saved street summary to gateway_crime_by_street.csv")
    print(f"\nüî¨ Ready for ownership correlation analysis!")

---

## Next Steps

Once crime data is extracted and processed:

1. ‚úÖ Crime data cleaned and summarized by street
2. ‚û°Ô∏è Proceed to `gateway_parks_crime_ownership_analysis.ipynb`
3. ‚û°Ô∏è Merge with property ownership data from CAD
4. ‚û°Ô∏è Perform correlation analysis
5. ‚û°Ô∏è Generate visualizations and conclusions