In [None]:
# Paths
DATA_DIR = Path("stationdata")   # folder containing all station CSVs
STATIONINFO_FILE = "stationinfo.csv"
DATETILL2025_FILE = "datetill2025.csv"

station_records = []

# Step 1: Build stationinfo.csv
for file in DATA_DIR.glob("*.csv"):
    try:
        df = pd.read_csv(file, usecols=["station", "date"])
        df = df.dropna(subset=["date"])
        df["date"] = pd.to_datetime(df["date"], errors="coerce")
        df = df.dropna(subset=["date"])
        
        station_name = df["station"].iloc[0]
        start_date = df["date"].min().date()
        end_date = df["date"].max().date()
        
        station_records.append({
            "station": station_name,
            "startdate": start_date,
            "enddate": end_date
        })
        
        print(f"{station_name}: {start_date} â†’ {end_date}")
        
    except Exception as e:
        print(f"Skipped {file.name}: {e}")

# Save stationinfo.csv
stationinfo_df = pd.DataFrame(station_records)
stationinfo_df.to_csv(STATIONINFO_FILE, index=False)
print(f"Saved station info to {STATIONINFO_FILE}")

# Step 2: Generate datetill2025.csv
# Compute date range: from max(startdate) to max(enddate)
max_start = stationinfo_df["startdate"].max()
max_end = stationinfo_df["enddate"].max()

all_dates = pd.date_range(start=max_start, end=max_end, freq="D").date

# Save to datetill2025.csv
datetill_df = pd.DataFrame({"date": all_dates})
datetill_df.to_csv(DATETILL2025_FILE, index=False)
print(f"Saved {len(all_dates)} dates to {DATETILL2025_FILE}")

In [None]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

In [None]:
def file_with_station(file,station):

    # Read file as text
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the row number where the data starts i.e. the row that contain 'date' in its first column
    header_row = None

    for i, line in enumerate(lines):
        # split on comma and strip spaces
        first_cell = line.split(",")[0].strip().lower()
        if first_cell == "date":
            header_row = i
            break
    
    # Read file as csv, delete uneccessary rows
    df = pd.read_csv(file, skiprows=header_row,low_memory=False)

    # convert 'wdsp' to numeric
    df["wdsp"] = pd.to_numeric(df["wdsp"], errors="coerce")

    # convert the measurement of wdsp into m/s
    df["wdsp_ms"] = df["wdsp"] * 0.5144444444

    # find first row where wind speed is not NaN
    first_valid_value = df.loc[df["wdsp"].notna()].index.min()

    # drop rows before that
    df = df.loc[first_valid_value:].reset_index(drop=True)

    # modify the format of 'date'
    df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y %H:%M')

    # Add a column which takes the station name as value
    df["station"]= station

    # Add a column that contains only date details
    df['dateonly']= df['date'].dt.date

    # Add a column that contains only year
    df['year'] = pd.to_datetime(df['date']).dt.strftime('%Y')

    # Add a column that contains only month-year
    df['yearmonth'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m')

    # Add a column that contains only month
    df['month'] = pd.to_datetime(df['date']).dt.strftime('%m')

    # Add a column that contains only hour
    df['hour'] = pd.to_datetime(df['date']).dt.strftime('%H:%M')

    # convert 'temp' to numeric
    df["temp"] = pd.to_numeric(df["temp"], errors="coerce")
    R = 287.05       # J/(kgÂ·K)
    p = 101325       # Pa (assumed constant)
    # Add column 'rho' - air density
    df["rho"] = p / (R * (df["temp"] + 273.15))

    # Add column 'power' - wind power density
    df['power'] = 0.5 * df["rho"] * df["wdsp_ms"]**3

    # Save as a new file in folder stationdata
    df.to_csv(f"stationdata/{station}.csv")
    print(f"The file {station}.csv is now created.")

In [None]:
file_with_station("data/hly275.csv","MACE HEAD")
file_with_station("data/hly375.csv","OAK PARK")
file_with_station("data/hly518.csv","SHANNON AIRPORT")
file_with_station("data/hly532.csv","DUBLIN AIRPORT")
file_with_station("data/hly575.csv","MOORE PARK")
file_with_station("data/hly675.csv","BALLYHAISE")
file_with_station("data/hly775.csv","SHERKIN ISLAND")
file_with_station("data/hly875.csv","MULLINGAR")
file_with_station("data/hly1075.csv","ROCHES POINT")
file_with_station("data/hly1175.csv","NEWPORT")
file_with_station("data/hly1375.csv","DUNSANY")
file_with_station("data/hly1475.csv","GURTEEN")
file_with_station("data/hly1575.csv","MALIN HEAD")
file_with_station("data/hly1775.csv","JOHNSTOWN CASTLE 2")
file_with_station("data/hly1875.csv","ATHENRY")
file_with_station("data/hly1975.csv","MT DILLON")
file_with_station("data/hly2075.csv","FINNER")
file_with_station("data/hly2175.csv","CLAREMORRIS")
file_with_station("data/hly2275.csv","VALENTIA OBSERVATORY")
file_with_station("data/hly2375.csv","BELMULLET")
file_with_station("data/hly3904.csv","CORK AIRPORT")
file_with_station("data/hly4935.csv","KNOCK AIRPORT")

In [None]:
file_with_station("data/hly375.csv","OAK PARK")

In [None]:
oakpark = pd.read_csv("stationdata/OAK PARK.csv")
oakpark.head()

In [None]:
# Variation of wind speed by month
plt.figure(figsize=(15,4))
monthly_wdsp = oakpark.groupby("month")["wdsp_ms"].mean()
monthly_wdsp.plot(title="Monthly Mean Wind speed")
plt.xlabel('months of a year')
plt.ylabel('Wind power')
plt.grid(axis = 'y',color = 'green', linestyle = '--', linewidth = 0.4)

In [None]:
# Variation of power density by month
plt.figure(figsize=(15,4))
rho = 1.225  # air density kg/m^3
oakpark["power_density"] = 0.5 * rho * oakpark["wdsp_ms"]**3
monthly_power_density = oakpark.groupby("month")["power_density"].mean()
monthly_power = oakpark.groupby("month")["power"].mean()
monthly_power_density.plot(color='green')
monthly_power.plot(color='orange')
plt.title("Monthly Mean Wind Power Density")
plt.xlabel('months of a year')
plt.ylabel('Wind power')
plt.grid(axis = 'y',color = 'green', linestyle = '--', linewidth = 0.4)

In [None]:
# variation of power density by hour
plt.figure(figsize=(15,4))
hourly_power_density = oakpark.groupby("hour")["power_density"].mean()
hourly_power = oakpark.groupby("hour")["power"].mean()
hourly_power_density.plot(color='green')
hourly_power.plot(color='orange')
plt.title("Hourly Mean Wind Power Density")
plt.xlabel('hours of a day')
plt.ylabel('Wind power')
plt.grid(axis = 'y',color = 'green', linestyle = '--', linewidth = 0.4)

In [None]:
# Variation of wind speed by month
plt.figure(figsize=(15,4))
hourly_wdsp = oakpark.groupby("hour")["wdsp_ms"].mean()
hourly_wdsp.plot(title="Hourly Mean Wind speed")
plt.xlabel('hours of a day')
plt.ylabel('Wind speed')
plt.grid(axis = 'y',color = 'green', linestyle = '--', linewidth = 0.4)

In [None]:
# Study wind speed range
plt.hist(oakpark["wdsp_ms"], bins=20)
plt.xlabel("Wind speed (m/s)")
plt.ylabel("Frequency")
plt.title("Wind speed distribution")
plt.show()

In [None]:
# Define the wind range
cut_in = 7
optimum = 18     
cut_out = 27 

In [None]:
# Study wind range for wind turbines operation
oakpark["operating"] = (oakpark["wdsp"] >= 7) & (oakpark["wdsp"] <= 27)
oakpark["optimal"]   = (oakpark["wdsp"] >= 12) & (oakpark["wdsp"] <= 18)

# number of hours per day that the wind speed is in the operating range
daily_hours = oakpark.groupby("dateonly")["operating"].sum()
daily_avg = daily_hours.mean()

daily_optimal = oakpark.groupby("dateonly")["optimal"].sum().mean()

print(f'{daily_avg} & {daily_optimal}')


In [None]:
# number of hours per day that the wind speed is in the operating range
monthly_hours = oakpark.groupby("month")["operating"].sum()
monthly_avg = monthly_hours.mean()

monthly_optimal = oakpark.groupby("month")["optimal"].sum().mean()
print(f'{monthly_avg} & {monthly_optimal}')

In [None]:
plt.bar(monthly_hours.index, monthly_hours.values)
plt.xlabel("Month")
plt.ylabel("Operating hours")
plt.title("Monthly wind turbine operating hours")
plt.show()

In [None]:
yearly_hours = oakpark.groupby("year")["operating"].sum()
yearly_avg = yearly_hours.mean()

yearly_optimal = oakpark.groupby("year")["optimal"].sum().mean()

In [None]:
plt.bar(yearly_hours.index, yearly_hours.values)
plt.xlabel("Year")
plt.ylabel("Operating hours")
plt.title("Yearly wind turbine operating hours")
plt.show()

## is there a trend in recorded wind speeds over the last few decades?

In [None]:
from scipy.stats import linregress

DATADIR = Path("stationdata")

# Combine all stations or analyze individually
all_df = []

for file in DATADIR.glob("*.csv"):
    df = pd.read_csv(file, parse_dates=["date"])
    df = df.dropna(subset=["wdsp"])
    df["station"] = file.stem
    all_df.append(df[["date","wdsp","station"]])

df = pd.concat(all_df)

# Aggregate by year
df["year"] = df["date"].dt.year
annual_mean = df.groupby("year")["wdsp"].mean()

# Fit linear regression
slope, intercept, r_value, p_value, std_err = linregress(annual_mean.index, annual_mean.values)

print(f"Slope: {slope:.3f} m/s per year")
print(f"P-value: {p_value:.3f}")
print(f"R-squared: {r_value**2:.3f}")

# Plot
plt.figure(figsize=(10,5))
plt.plot(annual_mean.index, annual_mean.values, marker='o', label="Annual mean wind speed")
plt.plot(annual_mean.index, intercept + slope*annual_mean.index, color='red', linestyle='--', label="Trend")
plt.xlabel("Year")
plt.ylabel("Mean wind speed (m/s)")
plt.title("Annual mean wind speed trend")
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Annual mean wind speed at Oak Park with trend line

import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from scipy.stats import linregress

# Path to Oak Park CSV
file = Path("stationdata/OAK PARK.csv")  # replace with your file

# Read data
df = pd.read_csv(file, parse_dates=["date"])
df = df.dropna(subset=["wdsp_ms"])

# Aggregate annual mean wind speed
df["year"] = df["date"].dt.year
annual_mean = df.groupby("year")["wdsp_ms"].mean()

# X and Y for plotting
X = annual_mean.index.values  # just the year
Y = annual_mean.values

# Compute trend line
slope, intercept, r_value, p_value, std_err = linregress(X, Y)
trend = intercept + slope * X

print(f"Slope: {slope:.3f} m/s per year")
print(f"P-value: {p_value:.4f}, R-squared: {r_value**2:.3f}")

# Plot
fig, ax = plt.subplots(figsize=(10,5))

ax.plot(X, Y, marker='o', linestyle='-', color='tab:blue', label="Annual mean wdsp")
ax.plot(X, trend, color='red', linestyle='--', linewidth=2, label="Trend line")

ax.set_xlabel("Year")
ax.set_ylabel("Mean wind speed (m/s)")
ax.set_title("Annual mean wind speed at Oak Park with trend line")
ax.grid(linestyle='--', linewidth=0.5, color='0.25')
ax.legend(title="Legend", fontsize='small', loc="upper left")

plt.tight_layout()
plt.show()