# SmartPave Analytics: Data Exploration

## Overview
This notebook explores the pavement condition and maintenance data for 16,000 miles of roadway infrastructure.

## Objectives
- Load and examine the dataset structure
- Understand data quality and completeness
- Create initial visualizations
- Identify patterns and trends
- Prepare for feature engineering


## Setup and Imports


In [None]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Configure pandas display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("Libraries imported successfully!")


## Data Loading


In [None]:
# Load datasets from Snowflake tables
print("Loading datasets from Snowflake...")

# Connect to Snowflake (if not already connected)
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# Road network data
roads_df = session.sql("SELECT * FROM road_network").to_pandas()
print(f"Road network: {len(roads_df):,} segments")

# Pavement condition data
condition_df = session.sql("SELECT * FROM pavement_condition").to_pandas()
condition_df['date'] = pd.to_datetime(condition_df['date'])
print(f"Pavement condition: {len(condition_df):,} records")

# Maintenance records
maintenance_df = session.sql("SELECT * FROM maintenance_records").to_pandas()
maintenance_df['date'] = pd.to_datetime(maintenance_df['date'])
print(f"Maintenance records: {len(maintenance_df):,} records")

# Traffic data
traffic_df = session.sql("SELECT * FROM traffic_data").to_pandas()
print(f"Traffic data: {len(traffic_df):,} records")

print("\n✅ All datasets loaded successfully from Snowflake!")
