# Weather Data Analysis Example

This notebook demonstrates how to use the `WeatherDataReader` utility to load and analyze weather data from S3.


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from weather_data_reader import WeatherDataReader

# Set style for plots
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)


## Initialize the Reader

The AWS credentials are automatically loaded from your `.env` file.


In [None]:
# Initialize the reader with your S3 bucket details
reader = WeatherDataReader(
    bucket=os.getenv('AWS_BUCKET_NAME', 'your-bucket-name'),
    bronze_prefix="samples",
    silver_prefix="silver",
    region="us-west-2"
)

print("✓ WeatherDataReader initialized")


## List Available Data


In [None]:
# Check what dates have data available
available_dates = reader.list_available_dates(layer="silver")
print(f"Data available for {len(available_dates)} dates:")
print(f"First date: {available_dates[0] if available_dates else 'N/A'}")
print(f"Last date: {available_dates[-1] if available_dates else 'N/A'}")


## Load Recent Data


In [None]:
# Get last 24 hours of enriched data
df_24h = reader.get_readings(hours=24, layer="silver")

print(f"Loaded {len(df_24h)} readings from the last 24 hours")
print(f"\nColumns: {list(df_24h.columns)}")
print(f"\nFirst reading: {df_24h['timestamp'].min()}")
print(f"Last reading: {df_24h['timestamp'].max()}")

# Display first few rows
df_24h.head()


## Load Historical Data by Date Range


In [None]:
# Get data for a specific date range
df_historical = reader.get_readings_by_date_range(
    start_date="2025-10-01",
    end_date="2025-10-07",
    layer="silver"
)

print(f"Loaded {len(df_historical)} readings from date range")
df_historical.head()


## Get Latest Reading


In [None]:
# Get the most recent reading
latest = reader.get_latest_reading(layer="silver")

if latest:
    print("Latest weather reading:")
    for key, value in latest.items():
        print(f"  {key}: {value}")
else:
    print("No recent readings found")


## Visualize Temperature Trends


In [None]:
# Plot temperature over time
if 'temperature_celsius' in df_24h.columns:
    plt.figure(figsize=(14, 6))
    plt.plot(df_24h['timestamp'], df_24h['temperature_celsius'], linewidth=2)
    plt.title('Temperature Over Last 24 Hours', fontsize=16)
    plt.xlabel('Time', fontsize=12)
    plt.ylabel('Temperature (°C)', fontsize=12)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid(True, alpha=0.3)
    plt.show()
else:
    print("Temperature column not found in data")


## Get Daily Aggregates


In [None]:
# Get daily statistics
daily_stats = reader.get_daily_aggregates(
    start_date="2025-10-01",
    end_date="2025-10-07",
    layer="silver"
)

print(f"Daily statistics for {len(daily_stats)} days")
daily_stats.head()


## Prepare Data for LSTM Model

Example of preparing time series data for LSTM training.


In [None]:
import numpy as np
import torch
from sklearn.preprocessing import StandardScaler

# Get a week of data for training
df_train = reader.get_readings_by_date_range(
    start_date="2025-10-01",
    end_date="2025-10-07",
    layer="silver"
)

if not df_train.empty:
    # Select features for LSTM (adjust based on your data schema)
    feature_cols = ['temperature_celsius', 'humidity_percent', 'pressure_hpa']
    available_features = [col for col in feature_cols if col in df_train.columns]

    if available_features:
        # Extract features
        data = df_train[available_features].values

        # Normalize data
        scaler = StandardScaler()
        data_normalized = scaler.fit_transform(data)

        # Convert to PyTorch tensor
        data_tensor = torch.FloatTensor(data_normalized)

        print(f"Prepared {len(data_tensor)} samples with {len(available_features)} features")
        print(f"Features: {available_features}")
        print(f"Tensor shape: {data_tensor.shape}")
    else:
        print("No matching features found in data")
else:
    print("No training data available")


## Next Steps

Now you can:
1. Build your LSTM model using PyTorch
2. Train on historical weather data
3. Export the model to ONNX format for deployment

Example LSTM model structure:
```python
import torch.nn as nn

class WeatherLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(WeatherLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out
```
