# Chicago SMB Market Radar — Data Exploration

This notebook provides the foundation for analyzing Chicago small business data from Google Sheets.

## Objectives
- Connect to Google Sheets data warehouse
- Load and explore business licenses, permits, and CTA data
- Perform initial data quality assessment
- Set up reusable analysis functions


## Setup and Imports


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Import our custom modules
import sys
sys.path.append('../src')
from sheets import open_sheet
from config import load_settings

print("✅ Imports successful")


## Google Sheets Connection

Connect to our data warehouse and load the datasets.


In [None]:
# Load configuration
settings = load_settings()
print(f"📊 Sheet ID: {settings.sheet_id}")
print(f"📁 Credentials: {settings.google_creds_path}")

# Connect to Google Sheets
try:
    sh = open_sheet(settings.sheet_id, settings.google_creds_path)
    print("✅ Successfully connected to Google Sheets")

    # List available worksheets
    worksheets = [ws.title for ws in sh.worksheets()]
    print(f"📋 Available worksheets: {worksheets}")

except Exception as e:
    print(f"❌ Error connecting to Google Sheets: {e}")
    print("Please check your .env file and Google credentials")


## Data Loading Functions

Create reusable functions to load data from Google Sheets.


In [None]:
def load_sheet_data(sheet, worksheet_name, parse_dates=None):
    """
    Load data from a Google Sheets worksheet into a pandas DataFrame.

    Args:
        sheet: Google Sheets object
        worksheet_name: Name of the worksheet
        parse_dates: List of column names to parse as dates

    Returns:
        pandas.DataFrame: Loaded data
    """
    try:
        ws = sheet.worksheet(worksheet_name)
        data = ws.get_all_records()
        df = pd.DataFrame(data)

        # Parse dates if specified
        if parse_dates:
            for col in parse_dates:
                if col in df.columns:
                    df[col] = pd.to_datetime(df[col], errors='coerce')

        print(f"✅ Loaded {len(df)} rows from '{worksheet_name}'")
        return df

    except Exception as e:
        print(f"❌ Error loading '{worksheet_name}': {e}")
        return pd.DataFrame()

def get_data_summary(df, name):
    """
    Print a summary of the loaded dataset.

    Args:
        df: pandas.DataFrame
        name: Dataset name for display
    """
    print(f"\n📊 {name} Dataset Summary")
    print(f"   Rows: {len(df):,}")
    print(f"   Columns: {len(df.columns)}")
    print(f"   Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

    if not df.empty:
        print(f"\n📋 Column Information:")
        for col in df.columns:
            dtype = df[col].dtype
            null_count = df[col].isnull().sum()
            null_pct = (null_count / len(df)) * 100
            print(f"   {col}: {dtype} ({null_count:,} nulls, {null_pct:.1f}%)")

    print("\n" + "="*50)

print("✅ Data loading functions defined")
