# IoT Data Exploration
## Simulated Sensor Data for Outbreak Prediction

**Objective**: Explore simulated IoT time series data (temperature, humidity, soil moisture, wind, rain) and SEIR disease model outputs.

In [None]:
import sys
sys.path.insert(0, '../..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from src.data.iot_simulator import IoTSimulator
from src.features.temporal_features import TemporalFeatureEngineer
from src.features.disease_rules import DISEASE_PROFILES, is_condition_favorable, get_risk_score

plt.style.use('seaborn-v0_8')
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

## 1. Generate IoT Data

In [None]:
config = {
    'num_fields': 5,
    'days': 365,
    'seed': 42,
    'temperature': {'base_mean': 25, 'base_amplitude': 8, 'diurnal_amplitude': 5, 'ar1_coeff': 0.7, 'noise_std': 1.5},
    'humidity': {'base_mean': 70, 'anti_corr_factor': -1.5, 'noise_std': 5},
    'rain': {'zero_inflation': 0.7, 'gamma_shape': 0.8, 'gamma_scale': 5.0},
    'soil_moisture': {'initial': 0.35, 'rain_factor': 0.05, 'decay_rate': 0.03},
    'wind': {'log_mean': 1.5, 'log_std': 0.6},
    'disease_seir': {
        'S0': 0.95, 'E0': 0.03, 'I0': 0.02, 'R0': 0.0,
        'beta_base': 0.3, 'sigma': 0.1, 'gamma': 0.05,
        'temp_optimal': 25.0, 'humidity_optimal': 85.0,
    },
}

sim = IoTSimulator(config, seed=42)
all_data = sim.generate_all()
df = all_data[0]
print(f'Generated data for {len(all_data)} fields, {len(df)} days each')
df.head()

## 2. Environmental Variables Over Time

In [None]:
fig, axes = plt.subplots(3, 2, figsize=(16, 12), sharex=True)

axes[0,0].plot(df['date'], df['temperature'], color='red', alpha=0.7)
axes[0,0].set_title('Temperature (C)')
axes[0,0].set_ylabel('C')

axes[0,1].plot(df['date'], df['humidity'], color='blue', alpha=0.7)
axes[0,1].set_title('Humidity (%)')

axes[1,0].bar(df['date'], df['rain_mm'], color='steelblue', alpha=0.7, width=1)
axes[1,0].set_title('Rainfall (mm)')

axes[1,1].plot(df['date'], df['soil_moisture'], color='brown', alpha=0.7)
axes[1,1].set_title('Soil Moisture')

axes[2,0].plot(df['date'], df['wind_speed'], color='gray', alpha=0.7)
axes[2,0].set_title('Wind Speed')

axes[2,1].fill_between(df['date'], df['disease_prevalence'], color='red', alpha=0.5)
axes[2,1].set_title('Disease Prevalence (SEIR)')

for ax in axes.flatten():
    ax.grid(True, alpha=0.3)

plt.suptitle('Field 0 - 365 Day Simulation', fontsize=14)
plt.tight_layout()
plt.show()

## 3. Correlation Analysis

In [None]:
cols = ['temperature', 'humidity', 'soil_moisture', 'wind_speed', 'rain_mm', 'disease_prevalence']
corr = df[cols].corr()

fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(corr, annot=True, fmt='.2f', cmap='RdBu_r', center=0, ax=ax)
ax.set_title('Variable Correlation Matrix')
plt.tight_layout()
plt.show()

## 4. Temporal Feature Engineering

In [None]:
eng = TemporalFeatureEngineer()
df_features = eng.transform(df)

print(f'Original columns: {len(df.columns)}')
print(f'After feature engineering: {len(df_features.columns)}')
print(f'\nNew features:')
new_cols = set(df_features.columns) - set(df.columns)
for col in sorted(new_cols):
    print(f'  {col}')

## 5. Disease Risk Assessment

In [None]:
# Calculate risk scores for each disease over the year
diseases = ['Tomato_Early_blight', 'Tomato_Late_blight', 'Tomato_Bacterial_spot']

fig, ax = plt.subplots(figsize=(14, 5))
for disease in diseases:
    risks = [get_risk_score(disease, t, h) for t, h in zip(df['temperature'], df['humidity'])]
    ax.plot(df['date'], risks, label=disease.replace('_', ' '), alpha=0.7)

ax.set_ylabel('Risk Score (0-1)')
ax.set_title('Disease Risk Scores Based on Environmental Conditions')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 6. Cross-Field Comparison

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

for fid, fdf in all_data.items():
    axes[0].plot(fdf['date'], fdf['disease_prevalence'], alpha=0.6, label=f'Field {fid}')
    axes[1].plot(fdf['date'], fdf['temperature'], alpha=0.4)

axes[0].set_title('Disease Prevalence Across Fields')
axes[0].set_ylabel('Prevalence')
axes[0].legend(fontsize=8)
axes[0].grid(True, alpha=0.3)

axes[1].set_title('Temperature Across Fields')
axes[1].set_ylabel('Temperature (C)')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()