In [None]:
# Download dataset (requires Kaggle API setup)
# !kaggle datasets download -d atharvasoundankar/shifting-seas-ocean-climate-and-marine-life-dataset
# !unzip shifting-seas-ocean-climate-and-marine-life-dataset.zip -d coastal_data/

# For now, create placeholder directory structure
data_dir = Path('coastal_data')
data_dir.mkdir(exist_ok=True)
print(f"Data directory: {data_dir.absolute()}")

# Create synthetic coastal sensor data for testing
class CoastalDataProcessor:
    def __init__(self, data_dir):
        self.data_dir = Path(data_dir)
        self.sensor_data = None
        
    def create_synthetic_coastal_data(self, n_samples=1000):
        """Create synthetic coastal environmental data for testing"""
        np.random.seed(42)
        dates = pd.date_range('2020-01-01', periods=n_samples, freq='D')
        
        # Simulate seasonal patterns and erosion indicators
        seasonal_trend = np.sin(2 * np.pi * np.arange(n_samples) / 365)
        
        self.sensor_data = pd.DataFrame({
            'timestamp': dates,
            'pH': 8.1 + 0.3 * seasonal_trend + np.random.normal(0, 0.1, n_samples),
            'salinity': 35.0 + 2.0 * seasonal_trend + np.random.normal(0, 0.5, n_samples),
            'dissolved_oxygen': 7.5 + 1.0 * seasonal_trend + np.random.normal(0, 0.3, n_samples),
            'temperature': 15.0 + 8.0 * seasonal_trend + np.random.normal(0, 1.0, n_samples),
            'wave_height': 1.5 + 0.8 * seasonal_trend + np.random.normal(0, 0.3, n_samples),
            'tide_level': 2.0 * np.sin(4 * np.pi * np.arange(n_samples) / 365) + np.random.normal(0, 0.1, n_samples),
            'erosion_risk': np.random.choice([0, 1, 2], n_samples, p=[0.6, 0.3, 0.1])  # 0=low, 1=medium, 2=high
        })
        
        print("Created synthetic coastal sensor data")
        return self.sensor_data

# Initialize processor and create data
processor = CoastalDataProcessor('coastal_data')
sensor_data = processor.create_synthetic_coastal_data()
print(f"Sensor data shape: {sensor_data.shape}")
print(f"Columns: {list(sensor_data.columns)}")
