In [2]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

class PerfectWindTurbineSimulator:
    """
    Perfect operation wind turbine simulator for baseline dataset.
    
    CORRECTED THRESHOLDS:
    --------------------
    Based on Data Documentation v1.3:
    - Rotor Speed: 6-20 RPM (was allowing >20)
    - Hydraulic Pressure: 155-165 bar (was exact 160)
    - Gearbox Oil Pressure: 2.3-2.7 bar when running (was allowing 2.5±0.05)
    - Main Bearing Temp: <55°C during operation
    - Gearbox Bearing Temp: <70°C 
    - Generator Stator Temp: <90°C
    - All vibrations within ISO Zone A-B (good condition)
    
    CHARACTERISTICS:
    ---------------
    - NO component degradation (always healthy)
    - NO random failures
    - NO alarms (alarm_code always 0)
    - NO faults (no Status 400)
    - NO maintenance (no Status 300)
    - Only Status 100 (running) and 200 (idle - weather dependent)
    - All health scores = 1.0 always
    - All parameters within normal thresholds
    - Minimal realistic sensor noise
    
    DEFAULT CONFIGURATION:
    ---------------------
    - Date Range: January 1, 2024 to January 1, 2026 (2 years)
    - Data Interval: 60 minutes (1 hour)
    - Turbines: 4
    - Records per turbine: 17,520 (24 hours × 730 days)
    - Total records: 70,080 (4 turbines)
    """
    
    def __init__(self, turbine_id, rated_power=2500, cut_in_speed=3, 
                 rated_speed=11, cut_out_speed=25, location_offset=0):
        """
        Initialize perfect turbine.
        
        Args:
            turbine_id: Unique identifier (e.g., "WT001")
            rated_power: Rated power in kW
            cut_in_speed: Cut-in wind speed (m/s)
            rated_speed: Rated wind speed (m/s)
            cut_out_speed: Cut-out wind speed (m/s)
            location_offset: Geographic offset for wake effects (0-3)
        """
        self.turbine_id = turbine_id
        self.rated_power = rated_power
        self.cut_in_speed = cut_in_speed
        self.rated_speed = rated_speed
        self.cut_out_speed = cut_out_speed
        self.location_offset = location_offset
        
        # Components are ALWAYS healthy - no degradation
        self.bearing_wear = 0.0  # Always 0
        self.gearbox_oil_quality = 1.0  # Always 1.0
        self.generator_winding_temp = 0.0  # Always 0
        self.blade_imbalance = 0.0  # Always 0
        
        # State is only running or idle (weather dependent)
        self.state = "running"
        
    def power_curve(self, wind_speed):
        """Physics-based power curve - PERFECT efficiency."""
        if wind_speed < self.cut_in_speed:
            return 0
        elif wind_speed < self.rated_speed:
            # Region 2: MPPT - perfect cubic relationship
            normalized = (wind_speed - self.cut_in_speed) / (self.rated_speed - self.cut_in_speed)
            power = self.rated_power * (normalized ** 3)
            return power
        elif wind_speed < self.cut_out_speed:
            # Region 3: Rated power - perfect
            return self.rated_power
        else:
            return 0  # Cut-out for safety
    
    def check_operational_state(self, wind_speed):
        """
        Determine if turbine should be running based ONLY on wind.
        No faults, no maintenance - only weather dependent.
        """
        # Only wind conditions matter
        if wind_speed < self.cut_in_speed or wind_speed > self.cut_out_speed:
            self.state = "idle"
            return False
        else:
            self.state = "running"
            return True
    
    def generate_data(self, start_date, end_date, interval_minutes=60):
        """
        Generate PERFECT operation data for specified date range.
        
        Args:
            start_date: Starting datetime
            end_date: Ending datetime
            interval_minutes: Data sampling interval
        """
        # Calculate number of samples based on date range
        total_minutes = int((end_date - start_date).total_seconds() / 60)
        n_samples = total_minutes // interval_minutes
        days = total_minutes / (24 * 60)
        
        print(f"Generating {n_samples:,} PERFECT samples for {self.turbine_id} over {days:.0f} days...")
        
        # Generate timestamps
        timestamps = [start_date + timedelta(minutes=i*interval_minutes) 
                     for i in range(n_samples)]
        
        # Generate realistic wind speed with seasonal variation
        wind_speed = self._generate_annual_wind(n_samples, start_date, interval_minutes)
        
        # Initialize data arrays
        data = {
            'timestamp': timestamps,
            'turbine_id': [self.turbine_id] * n_samples,
            'wind_speed_ms': wind_speed,
            'power_kw': np.zeros(n_samples),
            'rotor_speed_rpm': np.zeros(n_samples),
            'generator_speed_rpm': np.zeros(n_samples),
            'pitch_angle_deg': np.zeros(n_samples),
            'yaw_angle_deg': np.zeros(n_samples),
            'nacelle_direction_deg': np.zeros(n_samples),
            'ambient_temp_c': np.zeros(n_samples),
            'nacelle_temp_c': np.zeros(n_samples),
            'gearbox_bearing_temp_c': np.zeros(n_samples),
            'gearbox_oil_temp_c': np.zeros(n_samples),
            'gearbox_oil_pressure_bar': np.zeros(n_samples),
            'generator_bearing1_temp_c': np.zeros(n_samples),
            'generator_bearing2_temp_c': np.zeros(n_samples),
            'generator_stator_temp_c': np.zeros(n_samples),
            'main_bearing_temp_c': np.zeros(n_samples),
            'hydraulic_pressure_bar': np.zeros(n_samples),
            'grid_voltage_v': np.zeros(n_samples),
            'grid_current_a': np.zeros(n_samples),
            'grid_frequency_hz': np.zeros(n_samples),
            'grid_power_factor': np.zeros(n_samples),
            'reactive_power_kvar': np.zeros(n_samples),
            'wind_direction_deg': np.zeros(n_samples),
            'status_code': np.zeros(n_samples, dtype=int),
            'alarm_code': np.zeros(n_samples, dtype=int),  # ALWAYS 0
            
            # CMS Data
            'main_bearing_vibration_rms_mms': np.zeros(n_samples),
            'main_bearing_vibration_peak_mms': np.zeros(n_samples),
            'gearbox_vibration_axial_mms': np.zeros(n_samples),
            'gearbox_vibration_radial_mms': np.zeros(n_samples),
            'generator_vibration_de_mms': np.zeros(n_samples),
            'generator_vibration_nde_mms': np.zeros(n_samples),
            'tower_vibration_fa_mms': np.zeros(n_samples),
            'tower_vibration_ss_mms': np.zeros(n_samples),
            'blade1_vibration_mms': np.zeros(n_samples),
            'blade2_vibration_mms': np.zeros(n_samples),
            'blade3_vibration_mms': np.zeros(n_samples),
            'acoustic_level_db': np.zeros(n_samples),
            
            # Component health indicators - ALWAYS PERFECT
            'bearing_wear_index': np.zeros(n_samples),  # Always 0.0
            'oil_quality_index': np.ones(n_samples),    # Always 1.0
            'generator_health_index': np.ones(n_samples),  # Always 1.0
            'overall_health_score': np.ones(n_samples)     # Always 1.0
        }
        
        # Generate wind direction (with persistence)
        wind_dir = self._generate_wind_direction(n_samples)
        data['wind_direction_deg'] = wind_dir
        
        # Generate ambient temperature with seasonal variation
        data['ambient_temp_c'] = self._generate_annual_temperature(n_samples, start_date, interval_minutes)
        
        # Current yaw position
        current_yaw = 0
        
        # Simulate each timestep
        for i in range(n_samples):
            # Check if turbine should operate (ONLY wind dependent)
            can_run = self.check_operational_state(wind_speed[i])
            
            if can_run and self.state == "running":
                # Calculate PERFECT power output
                power = self.power_curve(wind_speed[i])
                # Add only minimal measurement noise (±0.5%)
                data['power_kw'][i] = max(0, power + np.random.normal(0, power * 0.005))
                
                # Rotor speed (variable speed turbine) - PERFECT with strict 20 RPM cap
                if wind_speed[i] < self.rated_speed:
                    target_rpm = 6 + (wind_speed[i] / self.rated_speed) * 14
                else:
                    target_rpm = 20
                
                # Add minimal sensor noise but NEVER exceed 20 RPM
                data['rotor_speed_rpm'][i] = target_rpm + np.random.normal(0, 0.05)
                data['rotor_speed_rpm'][i] = max(6, min(19.95, data['rotor_speed_rpm'][i]))  # Hard cap at 19.95
                
                # Generator speed (gear ratio 1:97) - PERFECT
                gear_ratio = 97
                data['generator_speed_rpm'][i] = data['rotor_speed_rpm'][i] * gear_ratio + np.random.normal(0, 3)
                data['generator_speed_rpm'][i] = max(580, min(1935, data['generator_speed_rpm'][i]))  # Cap at 1935 (19.95*97)
                
                # Pitch angle - PERFECT control
                if wind_speed[i] > self.rated_speed:
                    pitch = min(25, (wind_speed[i] - self.rated_speed) * 2)
                    data['pitch_angle_deg'][i] = pitch + np.random.normal(0, 0.05)
                else:
                    data['pitch_angle_deg'][i] = np.random.normal(0, 0.1)
                
                data['pitch_angle_deg'][i] = max(-0.5, min(25, data['pitch_angle_deg'][i]))
                
                # Yaw control - PERFECT tracking
                yaw_error = wind_dir[i] - current_yaw
                yaw_error = ((yaw_error + 180) % 360) - 180
                yaw_correction = yaw_error * 0.2  # Quick, perfect response
                current_yaw = (current_yaw + yaw_correction) % 360
                data['yaw_angle_deg'][i] = current_yaw
                data['nacelle_direction_deg'][i] = current_yaw
                
                # Temperatures - PERFECT (proportional to load, no degradation effects)
                load_factor = data['power_kw'][i] / self.rated_power
                ambient = data['ambient_temp_c'][i]
                
                # Gearbox temperatures - NORMAL ranges (well below limits)
                base_gb_temp = ambient + 35 * load_factor  # Reduced from 40
                data['gearbox_bearing_temp_c'][i] = base_gb_temp + np.random.normal(0, 0.3)
                data['gearbox_oil_temp_c'][i] = base_gb_temp - 5 + np.random.normal(0, 0.3)
                # Clamp to safe ranges (well below 70°C limit)
                data['gearbox_bearing_temp_c'][i] = max(ambient, min(65, data['gearbox_bearing_temp_c'][i]))
                data['gearbox_oil_temp_c'][i] = max(ambient, min(60, data['gearbox_oil_temp_c'][i]))
                
                # Gearbox oil pressure - PERFECT (2.3-2.7 bar range)
                data['gearbox_oil_pressure_bar'][i] = 2.5 + np.random.normal(0, 0.03)
                data['gearbox_oil_pressure_bar'][i] = max(2.35, min(2.65, data['gearbox_oil_pressure_bar'][i]))
                
                # Generator temperatures - NORMAL ranges (well below limits)
                base_gen_temp = ambient + 50 * load_factor  # Reduced from 55
                data['generator_stator_temp_c'][i] = base_gen_temp + np.random.normal(0, 0.5)
                data['generator_bearing1_temp_c'][i] = base_gen_temp - 12 + np.random.normal(0, 0.3)
                data['generator_bearing2_temp_c'][i] = base_gen_temp - 10 + np.random.normal(0, 0.3)
                # Clamp to safe ranges (well below 90°C/75°C limits)
                data['generator_stator_temp_c'][i] = max(ambient, min(85, data['generator_stator_temp_c'][i]))
                data['generator_bearing1_temp_c'][i] = max(ambient, min(70, data['generator_bearing1_temp_c'][i]))
                data['generator_bearing2_temp_c'][i] = max(ambient, min(70, data['generator_bearing2_temp_c'][i]))
                
                # Main bearing temperature - NORMAL range (well below 55°C limit)
                base_mb_temp = ambient + 25 * load_factor  # Reduced from 30
                data['main_bearing_temp_c'][i] = base_mb_temp + np.random.normal(0, 0.3)
                data['main_bearing_temp_c'][i] = max(ambient, min(50, data['main_bearing_temp_c'][i]))
                
                # Nacelle ambient
                data['nacelle_temp_c'][i] = ambient + 10 * load_factor + np.random.normal(0, 0.3)
                
                # Hydraulic system - PERFECT (155-165 bar range)
                data['hydraulic_pressure_bar'][i] = 160 + np.random.normal(0, 0.8)
                data['hydraulic_pressure_bar'][i] = max(157, min(163, data['hydraulic_pressure_bar'][i]))
                
                # Grid parameters - PERFECT
                data['grid_voltage_v'][i] = 690 + np.random.normal(0, 2)
                data['grid_voltage_v'][i] = max(685, min(695, data['grid_voltage_v'][i]))
                data['grid_frequency_hz'][i] = 50.0 + np.random.normal(0, 0.008)
                data['grid_frequency_hz'][i] = max(49.99, min(50.01, data['grid_frequency_hz'][i]))
                
                # Calculate current from power - PERFECT
                if data['grid_voltage_v'][i] > 0:
                    pf = 0.95 + np.random.normal(0, 0.003)
                    pf = max(0.945, min(0.955, pf))
                    
                    data['grid_current_a'][i] = (data['power_kw'][i] * 1000) / (1.732 * data['grid_voltage_v'][i] * pf)
                    data['grid_power_factor'][i] = pf
                    
                    # Reactive power
                    pf_clamped = max(0.94, min(0.96, pf))
                    angle = np.arccos(pf_clamped)
                    data['reactive_power_kvar'][i] = data['power_kw'][i] * np.tan(angle)
                
                # CMS - Vibration data - PERFECT (ISO Zone A - Good)
                rotor_speed_factor = data['rotor_speed_rpm'][i] / 20.0
                
                # Main bearing vibration - LOW and healthy (well below warning threshold)
                base_mb_vib = 0.8 * rotor_speed_factor  # Lower base
                data['main_bearing_vibration_rms_mms'][i] = base_mb_vib + np.random.normal(0, 0.05)
                data['main_bearing_vibration_rms_mms'][i] = max(0.5, min(2.0, data['main_bearing_vibration_rms_mms'][i]))
                data['main_bearing_vibration_peak_mms'][i] = data['main_bearing_vibration_rms_mms'][i] * 1.7
                
                # Gearbox vibration - LOW and healthy
                base_gb_vib = 1.5 * rotor_speed_factor  # Lower base
                data['gearbox_vibration_axial_mms'][i] = base_gb_vib + np.random.normal(0, 0.08)
                data['gearbox_vibration_radial_mms'][i] = base_gb_vib * 1.05 + np.random.normal(0, 0.08)
                data['gearbox_vibration_axial_mms'][i] = max(0.8, min(3.5, data['gearbox_vibration_axial_mms'][i]))
                data['gearbox_vibration_radial_mms'][i] = max(0.8, min(3.7, data['gearbox_vibration_radial_mms'][i]))
                
                # Generator vibration - LOW and healthy
                gen_speed_factor = data['generator_speed_rpm'][i] / 1940.0
                data['generator_vibration_de_mms'][i] = 1.1 * gen_speed_factor + np.random.normal(0, 0.06)
                data['generator_vibration_nde_mms'][i] = 1.0 * gen_speed_factor + np.random.normal(0, 0.06)
                data['generator_vibration_de_mms'][i] = max(0.5, min(2.2, data['generator_vibration_de_mms'][i]))
                data['generator_vibration_nde_mms'][i] = max(0.5, min(2.0, data['generator_vibration_nde_mms'][i]))
                
                # Tower vibration - LOW and healthy
                data['tower_vibration_fa_mms'][i] = 0.25 * rotor_speed_factor + np.random.normal(0, 0.02)
                data['tower_vibration_ss_mms'][i] = 0.20 * rotor_speed_factor + np.random.normal(0, 0.02)
                data['tower_vibration_fa_mms'][i] = max(0.1, min(0.6, data['tower_vibration_fa_mms'][i]))
                data['tower_vibration_ss_mms'][i] = max(0.1, min(0.5, data['tower_vibration_ss_mms'][i]))
                
                # Blade vibration - BALANCED (no imbalance)
                base_blade_vib = 0.4 * rotor_speed_factor
                data['blade1_vibration_mms'][i] = base_blade_vib + np.random.normal(0, 0.03)
                data['blade2_vibration_mms'][i] = base_blade_vib + np.random.normal(0, 0.03)
                data['blade3_vibration_mms'][i] = base_blade_vib + np.random.normal(0, 0.03)
                data['blade1_vibration_mms'][i] = max(0.25, min(0.9, data['blade1_vibration_mms'][i]))
                data['blade2_vibration_mms'][i] = max(0.25, min(0.9, data['blade2_vibration_mms'][i]))
                data['blade3_vibration_mms'][i] = max(0.25, min(0.9, data['blade3_vibration_mms'][i]))
                
                # Acoustic level
                data['acoustic_level_db'][i] = 68 + 10 * load_factor + np.random.normal(0, 0.8)
                data['acoustic_level_db'][i] = max(65, min(85, data['acoustic_level_db'][i]))
                
                # Health indices - ALWAYS PERFECT
                data['bearing_wear_index'][i] = 0.0
                data['oil_quality_index'][i] = 1.0
                data['generator_health_index'][i] = 1.0
                data['overall_health_score'][i] = 1.0
                
                # Status code - RUNNING
                data['status_code'][i] = 100
                
                # Alarm code - ALWAYS 0 (NO ALARMS)
                data['alarm_code'][i] = 0
                
            else:
                # Turbine IDLE (wind out of range)
                data['power_kw'][i] = 0
                data['rotor_speed_rpm'][i] = 0
                data['generator_speed_rpm'][i] = 0
                data['pitch_angle_deg'][i] = 90  # Feathered
                data['yaw_angle_deg'][i] = current_yaw
                data['nacelle_direction_deg'][i] = current_yaw
                
                # Idle temperatures (cooling to ambient)
                ambient = data['ambient_temp_c'][i]
                data['nacelle_temp_c'][i] = ambient + np.random.normal(0, 0.15)
                data['gearbox_bearing_temp_c'][i] = ambient + np.random.normal(0, 0.2)
                data['gearbox_oil_temp_c'][i] = ambient + np.random.normal(0, 0.2)
                data['gearbox_oil_pressure_bar'][i] = 0  # Not running
                data['generator_bearing1_temp_c'][i] = ambient + np.random.normal(0, 0.2)
                data['generator_bearing2_temp_c'][i] = ambient + np.random.normal(0, 0.2)
                data['generator_stator_temp_c'][i] = ambient + np.random.normal(0, 0.2)
                data['main_bearing_temp_c'][i] = ambient + np.random.normal(0, 0.2)
                
                # Hydraulic pressure maintained even when idle (155-165 bar)
                data['hydraulic_pressure_bar'][i] = 160 + np.random.normal(0, 0.8)
                data['hydraulic_pressure_bar'][i] = max(157, min(163, data['hydraulic_pressure_bar'][i]))
                
                # No grid connection when idle
                data['grid_voltage_v'][i] = 0
                data['grid_current_a'][i] = 0
                data['grid_frequency_hz'][i] = 0
                data['grid_power_factor'][i] = 0
                data['reactive_power_kvar'][i] = 0
                
                # Minimal vibration when stopped
                data['main_bearing_vibration_rms_mms'][i] = max(0, np.random.normal(0.12, 0.04))
                data['main_bearing_vibration_peak_mms'][i] = max(0, np.random.normal(0.20, 0.06))
                data['gearbox_vibration_axial_mms'][i] = max(0, np.random.normal(0.12, 0.04))
                data['gearbox_vibration_radial_mms'][i] = max(0, np.random.normal(0.12, 0.04))
                data['generator_vibration_de_mms'][i] = max(0, np.random.normal(0.08, 0.03))
                data['generator_vibration_nde_mms'][i] = max(0, np.random.normal(0.08, 0.03))
                data['tower_vibration_fa_mms'][i] = max(0, np.random.normal(0.04, 0.015))
                data['tower_vibration_ss_mms'][i] = max(0, np.random.normal(0.04, 0.015))
                data['blade1_vibration_mms'][i] = max(0, np.random.normal(0.06, 0.02))
                data['blade2_vibration_mms'][i] = max(0, np.random.normal(0.06, 0.02))
                data['blade3_vibration_mms'][i] = max(0, np.random.normal(0.06, 0.02))
                data['acoustic_level_db'][i] = max(40, np.random.normal(43, 1.5))
                
                # Health indices - STILL PERFECT
                data['bearing_wear_index'][i] = 0.0
                data['oil_quality_index'][i] = 1.0
                data['generator_health_index'][i] = 1.0
                data['overall_health_score'][i] = 1.0
                
                # Status code - IDLE
                data['status_code'][i] = 200
                
                # Alarm code - ALWAYS 0
                data['alarm_code'][i] = 0
        
        df = pd.DataFrame(data)
        return df
    
    def _generate_annual_wind(self, n_samples, start_date, interval_minutes):
        """Generate realistic annual wind speed with seasonal patterns."""
        time_hours = np.arange(n_samples) * (interval_minutes/60)
        
        day_of_year = np.array([(start_date + timedelta(hours=h)).timetuple().tm_yday 
                                for h in time_hours])
        seasonal = 2 * np.cos(2 * np.pi * (day_of_year - 15) / 365)
        
        hour_of_day = np.array([(start_date + timedelta(hours=h)).hour 
                               for h in time_hours])
        diurnal = 1.5 * np.sin(2 * np.pi * (hour_of_day - 6) / 24)
        
        base_speed = 8.5 - self.location_offset * 0.5
        mean_wind = base_speed + seasonal + diurnal
        
        # Reduced turbulence for more stable wind
        turbulence = np.zeros(n_samples)
        turbulence_intensity = 0.15
        
        for i in range(1, n_samples):
            turbulence[i] = (0.98 * turbulence[i-1] + 
                           np.random.normal(0, mean_wind[i] * turbulence_intensity))
        
        wind_speed = mean_wind + turbulence
        wind_speed = np.maximum(wind_speed, 0)
        
        return wind_speed
    
    def _generate_wind_direction(self, n_samples):
        """Generate wind direction with persistence."""
        direction = np.zeros(n_samples)
        prevailing = 240 + self.location_offset * 5
        direction[0] = prevailing + np.random.normal(0, 30)
        
        for i in range(1, n_samples):
            change = np.random.normal(0, 1.5)
            direction[i] = (direction[i-1] + change) % 360
            error = prevailing - direction[i]
            error = ((error + 180) % 360) - 180
            direction[i] += error * 0.01
        
        return direction
    
    def _generate_annual_temperature(self, n_samples, start_date, interval_minutes):
        """Generate realistic annual temperature variation."""
        time_hours = np.arange(n_samples) * (interval_minutes/60)
        
        day_of_year = np.array([(start_date + timedelta(hours=h)).timetuple().tm_yday 
                                for h in time_hours])
        hour_of_day = np.array([(start_date + timedelta(hours=h)).hour 
                               for h in time_hours])
        
        seasonal = 12 * np.cos(2 * np.pi * (day_of_year - 195) / 365)
        diurnal = 5 * np.sin(2 * np.pi * (hour_of_day - 6) / 24)
        base_temp = 12
        
        temperature = base_temp + seasonal + diurnal + np.random.normal(0, 0.5, n_samples)
        
        return temperature


def generate_perfect_wind_farm_data(start_date, end_date, n_turbines=4, interval_minutes=60):
    """
    Generate PERFECT wind farm dataset (NO alarms, NO failures).
    
    Args:
        start_date: Starting datetime
        end_date: Ending datetime
        n_turbines: Number of turbines
        interval_minutes: Data interval in minutes (default: 60 = 1 hour)
    """
    total_days = (end_date - start_date).days
    
    print("="*70)
    print("PERFECT OPERATION WIND FARM DATA GENERATOR")
    print("="*70)
    print(f"Start Date: {start_date.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"End Date: {end_date.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Number of Turbines: {n_turbines}")
    print(f"Data Resolution: {interval_minutes} minutes ({interval_minutes/60:.1f} hours)")
    print(f"Total Duration: {total_days} days ({total_days/365:.1f} years)")
    print("\nCHARACTERISTICS:")
    print("  ✓ NO degradation")
    print("  ✓ NO failures")
    print("  ✓ NO alarms (alarm_code = 0 always)")
    print("  ✓ NO faults (no Status 400)")
    print("  ✓ NO maintenance (no Status 300)")
    print("  ✓ Only Status 100 (running) and 200 (idle)")
    print("  ✓ All health scores = 1.0")
    print("  ✓ All parameters in normal ranges")
    print("\nCORRECTED THRESHOLDS:")
    print("  ✓ Rotor speed: 6-20 RPM (hard cap at 19.95)")
    print("  ✓ Hydraulic pressure: 155-165 bar")
    print("  ✓ Gearbox oil pressure: 2.3-2.7 bar when running")
    print("  ✓ Temperatures well below alarm limits")
    print("="*70)
    
    all_turbines_data = []
    
    for i in range(n_turbines):
        turbine_id = f"WT{i+1:03d}"
        print(f"\nGenerating PERFECT data for {turbine_id}...")
        
        turbine = PerfectWindTurbineSimulator(
            turbine_id=turbine_id,
            rated_power=2500,
            location_offset=i
        )
        
        df = turbine.generate_data(start_date, end_date, interval_minutes=interval_minutes)
        all_turbines_data.append(df)
        
        print(f"  Total samples: {len(df):,}")
        print(f"  Average power: {df['power_kw'].mean():.1f} kW")
        print(f"  Capacity factor: {(df['power_kw'].mean() / 2500 * 100):.1f}%")
        print(f"  Health score: {df['overall_health_score'].mean()*100:.1f}% (constant)")
        print(f"  Alarms: {(df['alarm_code'] > 0).sum()} (ZERO)")
        
        # Verification checks
        running_data = df[df['status_code'] == 100]
        if len(running_data) > 0:
            print(f"\n  Verification (running data):")
            print(f"    Max rotor speed: {running_data['rotor_speed_rpm'].max():.2f} RPM (limit: 20)")
            print(f"    Min hydraulic pressure: {running_data['hydraulic_pressure_bar'].min():.2f} bar (limit: >155)")
            print(f"    Max hydraulic pressure: {running_data['hydraulic_pressure_bar'].max():.2f} bar (limit: <165)")
            oil_data = running_data[running_data['gearbox_oil_pressure_bar'] > 0]
            if len(oil_data) > 0:
                print(f"    Min gearbox oil pressure: {oil_data['gearbox_oil_pressure_bar'].min():.2f} bar (limit: >2.3)")
                print(f"    Max gearbox oil pressure: {oil_data['gearbox_oil_pressure_bar'].max():.2f} bar (limit: <2.7)")
            print(f"    Max main bearing temp: {running_data['main_bearing_temp_c'].max():.1f}°C (limit: <55)")
            print(f"    Max gearbox temp: {running_data['gearbox_bearing_temp_c'].max():.1f}°C (limit: <70)")
            print(f"    Max generator temp: {running_data['generator_stator_temp_c'].max():.1f}°C (limit: <90)")
    
    print("\n" + "="*70)
    print("Combining data from all turbines...")
    combined_df = pd.concat(all_turbines_data, ignore_index=True)
    combined_df = combined_df.sort_values(['timestamp', 'turbine_id']).reset_index(drop=True)
    
    output_file = 'wind_farm_PERFECT_baseline.csv'
    print(f"Saving to {output_file}...")
    combined_df.to_csv(output_file, index=False)
    
    print("\n" + "="*70)
    print("PERFECT OPERATION SUMMARY")
    print("="*70)
    
    for turbine_id in combined_df['turbine_id'].unique():
        turbine_data = combined_df[combined_df['turbine_id'] == turbine_id]
        hours_per_record = interval_minutes / 60
        print(f"\n{turbine_id}:")
        print(f"  Total Energy: {turbine_data['power_kw'].sum() * hours_per_record / 1000:.0f} MWh")
        print(f"  Capacity Factor: {(turbine_data['power_kw'].mean() / 2500 * 100):.1f}%")
        print(f"  Availability: {(turbine_data['status_code'] == 100).sum() / len(turbine_data) * 100:.1f}%")
        print(f"  Health Score: 100.0% (constant)")
        print(f"  Alarms: 0")
        print(f"  Faults: 0")
        print(f"  Maintenance: 0")
    
    print(f"\nTotal dataset size: {len(combined_df):,} records")
    print(f"Total duration: {total_days} days ({total_days/365:.1f} years)")
    print(f"Total alarms: {(combined_df['alarm_code'] > 0).sum()} (ZERO - PERFECT)")
    print(f"File size: ~{len(combined_df) * 1000 / 1024 / 1024:.1f} MB")
    print(f"Output saved to: {output_file}")
    
    print("\n" + "="*70)
    print("✓ PERFECT BASELINE DATASET COMPLETE")
    print("="*70)
    print("\nDATASET SPECIFICATIONS:")
    print(f"  Date Range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
    print(f"  Duration: {total_days} days ({total_days/365:.1f} years)")
    print(f"  Interval: {interval_minutes} minutes ({interval_minutes/60:.1f} hours)")
    print(f"  Records per turbine: {len(combined_df)//n_turbines:,}")
    print(f"  Total records: {len(combined_df):,}")
    print(f"  Turbines: {n_turbines}")
    print("\nUSE CASES:")
    print("  1. Baseline comparison for anomaly detection")
    print("  2. Verify alarm systems (should trigger ZERO alarms)")
    print("  3. Training on 'normal operation' patterns")
    print("  4. Performance benchmarking")
    print("  5. Control room demonstrations")
    print("="*70)
    
    return combined_df


# Run the generator
if __name__ == "__main__":
    # Date range: January 1, 2024 to January 1, 2026 (2 years)
    start_date = datetime(2024, 1, 1, 0, 0, 0)
    end_date = datetime(2026, 1, 1, 0, 0, 0)
    
    # Generate data with 1-hour intervals
    df = generate_perfect_wind_farm_data(
        start_date=start_date,
        end_date=end_date,
        n_turbines=4,
        interval_minutes=60  # 1 hour intervals
    )
    
    print("\nData generation complete!")
    print("\nSample data preview:")
    print(df[df['turbine_id'] == 'WT001'].head(10)[['timestamp', 'turbine_id', 
                                                      'wind_speed_ms', 'power_kw', 
                                                      'status_code', 'alarm_code',
                                                      'overall_health_score',
                                                      'rotor_speed_rpm',
                                                      'hydraulic_pressure_bar',
                                                      'gearbox_oil_pressure_bar']])



PERFECT OPERATION WIND FARM DATA GENERATOR
Start Date: 2024-01-01 00:00:00
End Date: 2026-01-01 00:00:00
Number of Turbines: 4
Data Resolution: 60 minutes (1.0 hours)
Total Duration: 731 days (2.0 years)

CHARACTERISTICS:
  ✓ NO degradation
  ✓ NO failures
  ✓ NO alarms (alarm_code = 0 always)
  ✓ NO faults (no Status 400)
  ✓ NO maintenance (no Status 300)
  ✓ Only Status 100 (running) and 200 (idle)
  ✓ All health scores = 1.0
  ✓ All parameters in normal ranges

CORRECTED THRESHOLDS:
  ✓ Rotor speed: 6-20 RPM (hard cap at 19.95)
  ✓ Hydraulic pressure: 155-165 bar
  ✓ Gearbox oil pressure: 2.3-2.7 bar when running
  ✓ Temperatures well below alarm limits

Generating PERFECT data for WT001...
Generating 17,544 PERFECT samples for WT001 over 731 days...
  Total samples: 17,544
  Average power: 982.4 kW
  Capacity factor: 39.3%
  Health score: 100.0% (constant)
  Alarms: 0 (ZERO)

  Verification (running data):
    Max rotor speed: 19.95 RPM (limit: 20)
    Min hydraulic pressure: 157.