# Satellite Imagery Analysis - Crop Health Monitoring

## Tier 2 AWS Project: NDVI Calculation with Lambda

This notebook analyzes satellite imagery processed by AWS Lambda to monitor crop health.

**What you'll learn:**
- Load and visualize NDVI metrics
- Analyze crop health trends
- Create interactive maps
- Compare field performance

---

## Setup: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from datetime import datetime
import warnings

warnings.filterwarnings('ignore')

# Set visualization style
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("âœ“ Libraries imported successfully")

## Step 1: Load Data

Load NDVI metrics from the S3 results downloaded by `query_results.py`

In [None]:
# Load metrics from CSV
results_dir = Path('../results')
csv_file = results_dir / 'metrics_summary.csv'

if csv_file.exists():
    df = pd.read_csv(csv_file)
    print(f"âœ“ Loaded data: {csv_file}")
    print(f"  Rows: {len(df)}")
    print(f"  Columns: {list(df.columns)}")
else:
    print(f"âš  File not found: {csv_file}")
    print(f"   Run: python ../scripts/query_results.py --bucket YOUR_BUCKET")
    
    # Create sample data for demonstration
    print("\n  Creating sample data for demonstration...")
    np.random.seed(42)
    
    fields = ['field_001', 'field_002', 'field_003']
    dates = pd.date_range('2024-06-01', '2024-06-30', freq='15D')
    
    data = []
    for field in fields:
        for date in dates:
            data.append({
                'field_id': field,
                'date': date.strftime('%Y%m%d'),
                'avg_ndvi': np.random.uniform(0.4, 0.8),
                'min_ndvi': np.random.uniform(0.2, 0.5),
                'max_ndvi': np.random.uniform(0.7, 0.95),
                'std_ndvi': np.random.uniform(0.05, 0.15),
                'vegetation_coverage': np.random.uniform(0.6, 0.95),
                'health_status': np.random.choice(['Healthy', 'Moderate', 'Stressed'])
            })
    
    df = pd.DataFrame(data)
    print(f"  âœ“ Sample data created with {len(df)} observations")

# Display first few rows
print("\nFirst few rows:")
print(df.head(10))

## Step 2: Data Exploration

In [None]:
# Data summary
print("Data Summary")
print("=" * 60)
print(f"\nDataset Shape: {df.shape}")
print(f"\nUnique Fields: {df['field_id'].nunique()}")
print(f"Date Range: {df['date'].min()} to {df['date'].max()}")

# NDVI statistics
print(f"\nNDVI Statistics:")
print(df[['avg_ndvi', 'min_ndvi', 'max_ndvi']].describe().round(4))

# Health status distribution
print(f"\nHealth Status Distribution:")
print(df['health_status'].value_counts())

# Missing values
print(f"\nMissing Values:")
print(df.isnull().sum())

## Step 3: NDVI Analysis

Analyze vegetation index trends and patterns

In [None]:
# Convert date to datetime for sorting
df['date_dt'] = pd.to_datetime(df['date'], format='%Y%m%d')

# Average NDVI by field
field_avg = df.groupby('field_id')['avg_ndvi'].agg(['mean', 'min', 'max', 'std']).round(4)
print("\nAverage NDVI by Field:")
print(field_avg)

# Plot NDVI by field
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Bar chart
field_avg['mean'].plot(kind='bar', ax=axes[0], color='green', alpha=0.7)
axes[0].set_title('Average NDVI by Field', fontsize=12, fontweight='bold')
axes[0].set_ylabel('NDVI')
axes[0].set_xlabel('Field ID')
axes[0].grid(True, alpha=0.3)
axes[0].axhline(y=0.5, color='orange', linestyle='--', label='Moderate Health')
axes[0].axhline(y=0.65, color='green', linestyle='--', label='Healthy')
axes[0].legend()

# Box plot
df.boxplot(column='avg_ndvi', by='field_id', ax=axes[1])
axes[1].set_title('NDVI Distribution by Field', fontsize=12, fontweight='bold')
axes[1].set_ylabel('NDVI')
axes[1].set_xlabel('Field ID')

plt.tight_layout()
plt.show()

print("âœ“ NDVI analysis complete")

## Step 4: Temporal Trends

Analyze how crop health changes over time

In [None]:
# Sort by date for time series
df_sorted = df.sort_values('date_dt')

# Plot time series for each field
fig, ax = plt.subplots(figsize=(12, 6))

for field in df['field_id'].unique():
    field_data = df_sorted[df_sorted['field_id'] == field]
    ax.plot(field_data['date_dt'], field_data['avg_ndvi'], 
           marker='o', label=field, linewidth=2, markersize=6)

ax.set_title('NDVI Trends Over Time', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('NDVI')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_ylim([0, 1])

# Add health zones
ax.axhspan(0, 0.3, alpha=0.1, color='red', label='Stressed')
ax.axhspan(0.3, 0.65, alpha=0.1, color='yellow', label='Moderate')
ax.axhspan(0.65, 1, alpha=0.1, color='green', label='Healthy')

plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

print("âœ“ Temporal trends plotted")

## Step 5: Health Status Analysis

In [None]:
# Health status by field
health_by_field = pd.crosstab(df['field_id'], df['health_status'])
print("\nHealth Status Distribution by Field:")
print(health_by_field)

# Plot health status
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Stacked bar chart
health_by_field.plot(kind='bar', stacked=False, ax=axes[0], 
                     color=['#2ecc71', '#f39c12', '#e74c3c'])
axes[0].set_title('Health Status Count by Field', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Count')
axes[0].set_xlabel('Field ID')
axes[0].legend(title='Health Status')

# Overall distribution pie chart
health_total = df['health_status'].value_counts()
colors = {'Healthy': '#2ecc71', 'Moderate': '#f39c12', 'Stressed': '#e74c3c'}
axes[1].pie(health_total.values, labels=health_total.index, autopct='%1.1f%%',
           colors=[colors.get(label, 'gray') for label in health_total.index])
axes[1].set_title('Overall Health Status Distribution', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

print("âœ“ Health status analysis complete")

## Step 6: Vegetation Coverage Analysis

In [None]:
# Vegetation coverage statistics
if 'vegetation_coverage' in df.columns:
    print("\nVegetation Coverage Statistics:")
    print(df['vegetation_coverage'].describe().round(4))
    
    # Plot vegetation coverage
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Scatter: NDVI vs Vegetation Coverage
    scatter = axes[0].scatter(df['avg_ndvi'], df['vegetation_coverage'], 
                             c=df['avg_ndvi'], cmap='RdYlGn', s=100, alpha=0.6)
    axes[0].set_xlabel('Average NDVI')
    axes[0].set_ylabel('Vegetation Coverage')
    axes[0].set_title('NDVI vs Vegetation Coverage', fontsize=12, fontweight='bold')
    plt.colorbar(scatter, ax=axes[0], label='NDVI')
    
    # Distribution by field
    df.boxplot(column='vegetation_coverage', by='field_id', ax=axes[1])
    axes[1].set_title('Vegetation Coverage by Field', fontsize=12, fontweight='bold')
    axes[1].set_ylabel('Coverage (%)')
    
    plt.tight_layout()
    plt.show()
    
    print("âœ“ Vegetation coverage analysis complete")
else:
    print("âš  Vegetation coverage data not available")

## Step 7: Field Ranking

Rank fields by crop health performance

In [None]:
# Create comprehensive field ranking
field_ranking = df.groupby('field_id').agg({
    'avg_ndvi': ['mean', 'min', 'max'],
    'vegetation_coverage': 'mean' if 'vegetation_coverage' in df.columns else 'count',
    'health_status': lambda x: (x == 'Healthy').sum() / len(x) * 100  # % Healthy observations
}).round(3)

field_ranking.columns = ['Avg NDVI', 'Min NDVI', 'Max NDVI', 'Veg Coverage', 'Health %']
field_ranking = field_ranking.sort_values('Avg NDVI', ascending=False)

print("\nField Performance Ranking:")
print("=" * 80)
print(field_ranking)

# Recommendations
print("\n" + "=" * 80)
print("Field Recommendations:")
print("=" * 80)

for idx, (field, row) in enumerate(field_ranking.iterrows(), 1):
    ndvi = row['Avg NDVI']
    
    if ndvi > 0.65:
        status = "âœ“ HEALTHY"
        action = "Continue normal management"
    elif ndvi > 0.45:
        status = "âš  MODERATE"
        action = "Monitor closely, consider irrigation/fertilization"
    else:
        status = "âœ— STRESSED"
        action = "Immediate intervention recommended"
    
    print(f"\n{idx}. {field} - {status}")
    print(f"   NDVI: {ndvi:.3f}")
    print(f"   Action: {action}")

## Step 8: Create Summary Report

In [None]:
# Generate summary report
report = f"""
CROP HEALTH MONITORING REPORT
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
{'=' * 60}

EXECUTIVE SUMMARY
Total Fields Monitored: {df['field_id'].nunique()}
Total Observations: {len(df)}
Date Range: {df['date'].min()} to {df['date'].max()}

NDVI STATISTICS
Average NDVI: {df['avg_ndvi'].mean():.4f}
Median NDVI: {df['avg_ndvi'].median():.4f}
Min NDVI: {df['avg_ndvi'].min():.4f}
Max NDVI: {df['avg_ndvi'].max():.4f}

HEALTH STATUS SUMMARY
Healthy: {(df['health_status'] == 'Healthy').sum()} observations ({(df['health_status'] == 'Healthy').sum() / len(df) * 100:.1f}%)
Moderate: {(df['health_status'] == 'Moderate').sum()} observations ({(df['health_status'] == 'Moderate').sum() / len(df) * 100:.1f}%)
Stressed: {(df['health_status'] == 'Stressed').sum()} observations ({(df['health_status'] == 'Stressed').sum() / len(df) * 100:.1f}%)

TOP PERFORMING FIELDS
"""

for idx, (field, row) in enumerate(field_ranking.head(3).iterrows(), 1):
    report += f"{idx}. {field}: NDVI {row['Avg NDVI']:.3f}\n"

report += f"""
FIELDS REQUIRING ATTENTION
"""

for idx, (field, row) in enumerate(field_ranking.tail(3).iterrows(), 1):
    report += f"{idx}. {field}: NDVI {row['Avg NDVI']:.3f}\n"

print(report)

# Save report
report_file = '../results/crop_health_report.txt'
Path(report_file).parent.mkdir(parents=True, exist_ok=True)

with open(report_file, 'w') as f:
    f.write(report)

print(f"\nâœ“ Report saved to: {report_file}")

## Step 9: Export Results

In [None]:
# Export detailed results
output_dir = Path('../results')
output_dir.mkdir(parents=True, exist_ok=True)

# Export field ranking
field_ranking.to_csv(output_dir / 'field_ranking.csv')
print(f"âœ“ Field ranking exported to: field_ranking.csv")

# Export detailed metrics
df.to_csv(output_dir / 'detailed_metrics.csv', index=False)
print(f"âœ“ Detailed metrics exported to: detailed_metrics.csv")

# Export summary statistics
summary_stats = {
    'total_fields': df['field_id'].nunique(),
    'total_observations': len(df),
    'avg_ndvi': float(df['avg_ndvi'].mean()),
    'healthy_count': int((df['health_status'] == 'Healthy').sum()),
    'moderate_count': int((df['health_status'] == 'Moderate').sum()),
    'stressed_count': int((df['health_status'] == 'Stressed').sum())
}

with open(output_dir / 'summary_stats.json', 'w') as f:
    json.dump(summary_stats, f, indent=2)

print(f"âœ“ Summary statistics exported to: summary_stats.json")

print(f"\nâœ“ All results exported to: {output_dir}")

## Summary

You've completed the Tier 2 Agriculture project!

### What You Learned
- **S3:** Upload and store satellite imagery in AWS
- **Lambda:** Process large datasets with serverless functions
- **Data Analysis:** Analyze NDVI and crop health metrics
- **AWS Integration:** Connect services for end-to-end workflow

### Next Steps
1. **Advanced Analysis:** Add time series forecasting, anomaly detection
2. **Scale Up:** Process 1000+ fields with parallel Lambda execution
3. **Integrate:** Combine with weather data, yield predictions
4. **Tier 3:** Automate with CloudFormation infrastructure as code

### Don't Forget!
**Delete AWS resources** when done to avoid charges:
```bash
python ../scripts/cleanup.py
```

---

**Congratulations on completing this AWS project!** ðŸŽ‰