# <center> CSCI 395 - NYC Subway Project<center>

### <center> José Miguel Ortiz
### <center> Hunter College <center>
### <center> CSCI 395: Introduction to Data Science <center>
### <center> Prof. Adrián Soto Cambres
### <center> Date: 04/08/2025

In [None]:
'''
Name: Jose Miguel Ortiz
Email: jose.ortiz60@lagcc.cuny.edu
Date: 04/08/2025
'''

Test 2

# NYC Subway Navigator: An Interactive Transit Analysis Tool

## Exploratory Analysis & Visualization

In [7]:
# libraries for data manipulation
import pandas as pd
import seaborn as sns
from mpl_toolkits.basemap import Basemap

# libraries for data visualization
from matplotlib import pyplot as plt

# jupyter extension to render charts inline
%matplotlib inline

# Load sample data
df = pd.read_csv('nyc_subway_stations.csv')

# Clean data
df = df[['Station Name', 'Entrance Latitude', 'Entrance Longitude',
         'ADA', 'Route1', 'Entrance Type']].dropna()

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

### 1. Station Density Heatmap

In [8]:
if __name__ == "__main__":
    plt.figure(figsize=(12,8))
    m = Basemap(projection='lcc', resolution='h',
                lat_0=40.7, lon_0=-74,
                width=1e6, height=1.2e6)
    m.shadedrelief()
    m.drawcoastlines(color='gray')
    m.drawcountries(color='gray')
    m.drawstates(color='gray')

    # Plot stations
    x, y = m(df['Entrance Longitude'].values, df['Entrance Latitude'].values)
    m.scatter(x, y, s=10, c='red', alpha=0.5,
              edgecolor='k', linewidth=0.5)
    plt.title('NYC Subway Station Density')
    plt.show()

NameError: name 'plt' is not defined

### 2. Route Connectivity Network

In [9]:
from collections import defaultdict

# Generate route connections
if __name__ == "__main__":
    route_graph = defaultdict(list)
    for _, row in df.iterrows():
        routes = [r for r in row[['Route1'] + [f'Route{i}' for i in range(2,12)] if pd.notna(r)]
        for i in range(len(routes)):
            for j in range(i+1, len(routes)):
                route_graph[routes[i]].append(routes[j])

    # Visualize
    plt.figure(figsize=(14,8))
    sns.heatmap(pd.DataFrame(route_graph).T.notna(), cmap='Blues', cbar=False)
    plt.title('Route Connection Matrix')
    plt.xlabel('Connected Routes')
    plt.ylabel('Primary Route')
    plt.xticks(rotation=45)
    plt.show()

SyntaxError: expected 'else' after 'if' expression (3059259041.py, line 7)

### 3. Accessibility Analysis

In [None]:
fig, ax = plt.subplots(1,2, figsize=(16,6))

# ADA Compliance
ada_counts = df['ADA'].value_counts()
ax[0].pie(ada_counts, labels=ada_counts.index,
          autopct='%1.1f%%', startangle=90,
          colors=['#4CAF50', '#F44336'])
ax[0].set_title('ADA Compliance Distribution')

# Entrance Types
entrance_counts = df['Entrance Type'].value_counts()
sns.barplot(x=entrance_counts.values, y=entrance_counts.index,
            ax=ax[1], palette='viridis')
ax[1].set_title('Entrance Type Distribution')
plt.tight_layout()
plt.show()

### 4. Interactive Route Explorer

In [None]:
import folium

# Create base map
nyc_coords = [40.7128, -74.0060]
subway_map = folium.Map(location=nyc_coords, zoom_start=11)

# Add route markers
for _, row in df.sample(100).iterrows():
    popup_text = f"""
    <b>{row['Station Name']}</b><br>
    Routes: {row.filter(like='Route').dropna().tolist()}<br>
    ADA: {row['ADA']}
    """
    folium.Marker(
        location=[row['Entrance Latitude'], row['Entrance Longitude']],
        popup=folium.Popup(popup_text, max_width=250),
        icon=folium.Icon(color='blue' if row['ADA']=='YES' else 'gray')
    ).add_to(subway_map)

subway_map

### 5. Station Distance Analysis

In [4]:
# Calculate inter-station distances
from scipy.spatial.distance import pdist, squareform

coords = df[['Entrance Latitude', 'Entrance Longitude']].values
distances = squareform(pdist(coords, lambda u,v: great_circle(u,v).km))

plt.figure(figsize=(10,6))
sns.histplot(distances.flatten(), bins=50, kde=True,
             color='purple', edgecolor='black')
plt.title('Inter-Station Distance Distribution')
plt.xlabel('Distance (km)')
plt.ylabel('Frequency')
plt.xlim(0, 10)
plt.show()

NameError: name 'df' is not defined