In [None]:
# Section 1: Load the ChesapeakeNeighborhoods Dataset
import pandas as pd
import os

# Load the CSV file
csv_path = "ChesapeakeNeighborhoods.csv" if os.path.exists("ChesapeakeNeighborhoods.csv") else "data/ChesapeakeNeighborhoods.csv"
df = pd.read_csv(csv_path)

# Display the first few rows
df.head()

# Chesapeake Neighborhoods Data Analysis

This notebook explores and analyzes the ChesapeakeNeighborhoods.csv dataset. We will inspect, clean, visualize, and aggregate the data to gain insights into Chesapeake's neighborhoods.

In [None]:
# Section 2: Inspect Data Structure and Missing Values
print("Columns:", df.columns.tolist())
print("Data types:\n", df.dtypes)
print("Missing values per column:\n", df.isnull().sum())

In [None]:
# Section 3: Summary Statistics for Area and Length
area_stats = df['SHAPESTArea'].describe()
length_stats = df['SHAPESTLength'].describe()
print("Area Statistics:\n", area_stats)
print("Length Statistics:\n", length_stats)

In [None]:
# Section 4: Top 10 Largest Neighborhoods by Area
top10_area = df.sort_values('SHAPESTArea', ascending=False).head(10)
top10_area[['NBRHD_NAME', 'SECTOR', 'SHAPESTArea']]

In [None]:
# Section 5: Neighborhoods with Missing or Unusual Data
missing_or_unusual = df[df['NBRHD_NAME'].isnull() | df['SECTOR'].isnull() | (df['SHAPESTArea'] < 1)]
missing_or_unusual

In [None]:
# Section 6: Distribution Plot of Neighborhood Areas
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10,6))
sns.histplot(data=df, x='SHAPESTArea', bins=30, kde=True)
plt.title('Distribution of Neighborhood Areas')
plt.xlabel('Area')
plt.ylabel('Count')
plt.show()

In [None]:
# Section 7: Group and Aggregate by SECTOR
grouped = df.groupby('SECTOR').agg(
    count=('NBRHD_NAME', 'count'),
    mean_area=('SHAPESTArea', 'mean'),
    total_area=('SHAPESTArea', 'sum')
)
grouped

In [None]:
# Section 8: Export Cleaned Data
cleaned_df = df.dropna(subset=['NBRHD_NAME', 'SECTOR'])
cleaned_df = cleaned_df[cleaned_df['SHAPESTArea'] >= 1]
cleaned_df.to_csv('ChesapeakeNeighborhoods_cleaned.csv', index=False)
print('Exported cleaned data to ChesapeakeNeighborhoods_cleaned.csv')