# Wind Turbine Data EDA
This notebook explores the wind turbine sensor dataset, visualizes key features, and investigates relationships for power prediction.

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
file_path = '../data/wind_turbine_data.csv'
df = pd.read_csv(file_path)
df.head()

## Data Overview
Check the shape, columns, and missing values in the dataset.

In [None]:
# Data overview
print('Shape:', df.shape)
print('Columns:', df.columns.tolist())
print('Missing values:\n', df.isnull().sum())
df.describe()

## Feature Distributions
Visualize the distribution of key sensor features and the target variable (Power).

In [None]:
# Plot distributions
features = ['Wind_Speed', 'Ambient_Air_temp', 'Bearing_Temp', 'GearTemp', 'GeneratorTemp', 'GearBoxSumpTemp', 'BladePitchAngle', 'Hub_Speed', 'Generator_Speed', 'Power']
df[features].hist(bins=30, figsize=(16, 10))
plt.tight_layout()
plt.show()

## Correlation Analysis
Explore correlations between features and Power.

In [None]:
# Correlation heatmap
corr = df[features].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

## Power vs. Wind Speed
Visualize the relationship between wind speed and power output.

In [None]:
# Power vs Wind Speed
plt.figure(figsize=(8, 5))
sns.scatterplot(x='Wind_Speed', y='Power', data=df, alpha=0.3)
plt.title('Power vs Wind Speed')
plt.xlabel('Wind Speed')
plt.ylabel('Power')
plt.show()

## Turbine Performance
Compare power output across different turbines.

In [None]:
# Power by Turbine
if 'TurbineName' in df.columns:
    plt.figure(figsize=(10, 5))
    sns.boxplot(x='TurbineName', y='Power', data=df)
    plt.title('Power Output by Turbine')
    plt.xticks(rotation=45)
    plt.show()