# Load Libraries and dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv("wind-turbine.csv")


In [None]:
# Convert 'commissioning_date' to datetime with only the year
data['commissioning_date'] = pd.to_datetime(data['commissioning_date'], format='%Y')

# Extract the year from the current date
current_date = pd.to_datetime('2023-11-08')
current_year = current_date.year

# Calculate turbine age based on the year
data['turbine_age'] = current_year - data['commissioning_date'].dt.year


# Manufacturer vs. Turbine Age

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv("wind-turbine.csv")

# Convert 'commissioning_date' to datetime with only the year
data['commissioning_date'] = pd.to_datetime(data['commissioning_date'], format='%Y')

# Extract the year from the current date
current_date = pd.to_datetime('2023-11-08')
current_year = current_date.year

# Calculate turbine age based on the year
data['turbine_age'] = current_year - data['commissioning_date'].dt.year

# Visualization - Turbine Manufacturer vs. Turbine Age
plt.figure(figsize=(12, 6))

# Additional visualization to check distribution
plt.subplot(1, 2, 1)
sns.boxplot(x='manufacturer', y='turbine_age', data=data)
plt.xticks(rotation=90)
plt.title("Turbine Manufacturer vs. Turbine Age")

plt.subplot(1, 2, 2)
sns.violinplot(x='manufacturer', y='turbine_age', data=data)
plt.xticks(rotation=90)
plt.title("Distribution of Turbine Age by Manufacturer")

plt.tight_layout()
plt.show()



# Model vs. Turbine Age

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv("wind-turbine.csv")

# Convert 'commissioning_date' to datetime with only the year
data['commissioning_date'] = pd.to_datetime(data['commissioning_date'], format='%Y')

# Extract the year from the current date
current_date = pd.to_datetime('2023-11-08')
current_year = current_date.year

# Calculate turbine age based on the year
data['turbine_age'] = current_year - data['commissioning_date'].dt.year

# Select the top N models by count
top_n_models = data['model'].value_counts().nlargest(10).index

# Filter the data for the top N models
filtered_data = data[data['model'].isin(top_n_models)]

# Visualization - Turbine Model vs. Turbine Age
plt.figure(figsize=(12, 6))

# Additional visualization to check distribution
plt.subplot(1, 2, 1)
sns.boxplot(x='model', y='turbine_age', data=filtered_data)
plt.xticks(rotation=90)
plt.title("Turbine Model vs. Turbine Age (Top 10 Models)")

plt.subplot(1, 2, 2)
sns.violinplot(x='model', y='turbine_age', data=filtered_data)
plt.xticks(rotation=90)
plt.title("Distribution of Turbine Age by Model (Top 10 Models)")

plt.tight_layout()
plt.show()


# Count of Turbine Models by Manufacturer

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv("wind-turbine.csv")

# Visualization - Count of Turbine Models by Manufacturer with Counts
plt.figure(figsize=(12, 6))
ax = sns.countplot(x='manufacturer', data=data)
plt.xticks(rotation=90)
plt.title("Count of Turbines by Manufacturer")

# Add counts on top of each bar
for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='center', xytext=(0, 10), textcoords='offset points')

plt.show()





# Turbine age vs Commissioning date

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("wind-turbine.csv")

# Convert 'commissioning_date' to datetime with only the year
data['commissioning_date'] = pd.to_datetime(data['commissioning_date'], format='%Y')

# Extract the year from the current date
current_date = pd.to_datetime('2023-11-08')
current_year = current_date.year

# Calculate turbine age based on the year
data['turbine_age'] = current_year - data['commissioning_date'].dt.year

# Scatter plot - Turbine Age vs Commissioning Date
plt.figure(figsize=(12, 6))
plt.scatter(data['commissioning_date'], data['turbine_age'], alpha=0.5)
plt.title('Turbine Age vs Commissioning Date')
plt.xlabel('Commissioning Date')
plt.ylabel('Turbine Age')
plt.show()


In [None]:
pip install pandas scikit-learn


# Predict Remaining Useful Life (RUL)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from datetime import datetime

# Load the dataset
data = pd.read_csv("wind-turbine.csv")

# Convert 'commissioning_date' to datetime with only the year
data['commissioning_date'] = pd.to_datetime(data['commissioning_date'], format='%Y')

# Extract the year from the current date
current_date = datetime.now()
data['turbine_age'] = current_date.year - data['commissioning_date'].dt.year

# Calculate remaining useful life (RUL)
data['rul'] = (current_date - data['commissioning_date']).dt.days

# Select features and target variable
features = ['turbine_rated_capacity_k_w', 'rotor_diameter_m', 'hub_height_m']
target = 'rul'

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# Impute missing values with the mean
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

# Initialize and train the Random Forest Regressor model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error (MAE): {mae}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'R-squared (R2): {r2}')

