# PFDA Project - Winter 2024
*****

## Introduction

This notebook contains an analysis of historical windspeed data recorded by Met Éireann at the Johnstown Castle weather station in County Wexford between 12 August 2003 and 01 December 2024.

In [14]:
# import modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [15]:
# load data
df=pd.read_csv("hly1775.csv", skiprows=17, low_memory=False)

In [16]:
# sanity check
df

Unnamed: 0,date,ind,rain,ind.1,temp,ind.2,wetb,dewpt,vappr,rhum,msl,ind.3,wdsp,ind.4,wddir
0,12-aug-2003 01:00,-1,,4,,4,,,,,,7,,7,
1,12-aug-2003 02:00,-1,,4,,4,,,,,,7,,7,
2,12-aug-2003 03:00,-1,,4,,4,,,,,,7,,7,
3,12-aug-2003 04:00,-1,,4,,4,,,,,,7,,7,
4,12-aug-2003 05:00,-1,,4,,4,,,,,,7,,7,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186763,30-nov-2024 20:00,0,0.0,0,13.1,0,12.7,12.5,14.5,96,1010.7,2,17,2,190
186764,30-nov-2024 21:00,0,0.0,0,13.2,0,12.7,12.3,14.3,94,1010.3,2,18,2,180
186765,30-nov-2024 22:00,0,0.0,0,13.3,0,12.8,12.3,14.3,93,1010.0,2,18,2,190
186766,30-nov-2024 23:00,0,3.0,0,13.0,0,12.8,12.6,14.6,97,1009.8,2,15,2,190


In [17]:
# convert date column to datetime 
df["date"]=df['date'].astype('datetime64[ns]')
# set date column as index column
df.set_index('date', inplace=True)
df

Unnamed: 0_level_0,ind,rain,ind.1,temp,ind.2,wetb,dewpt,vappr,rhum,msl,ind.3,wdsp,ind.4,wddir
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2003-08-12 01:00:00,-1,,4,,4,,,,,,7,,7,
2003-08-12 02:00:00,-1,,4,,4,,,,,,7,,7,
2003-08-12 03:00:00,-1,,4,,4,,,,,,7,,7,
2003-08-12 04:00:00,-1,,4,,4,,,,,,7,,7,
2003-08-12 05:00:00,-1,,4,,4,,,,,,7,,7,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-30 20:00:00,0,0.0,0,13.1,0,12.7,12.5,14.5,96,1010.7,2,17,2,190
2024-11-30 21:00:00,0,0.0,0,13.2,0,12.7,12.3,14.3,94,1010.3,2,18,2,180
2024-11-30 22:00:00,0,0.0,0,13.3,0,12.8,12.3,14.3,93,1010.0,2,18,2,190
2024-11-30 23:00:00,0,3.0,0,13.0,0,12.8,12.6,14.6,97,1009.8,2,15,2,190


In [18]:
# drop missing values from windspeed data
df["wdsp"]= df.loc[:,("wdsp")].replace(' ', np.nan)
df.dropna(inplace=True)
df["wdsp"]

date
2003-08-13 11:00:00     4
2003-08-13 12:00:00     4
2003-08-13 13:00:00     4
2003-08-13 14:00:00     4
2003-08-13 15:00:00     3
                       ..
2024-11-30 20:00:00    17
2024-11-30 21:00:00    18
2024-11-30 22:00:00    18
2024-11-30 23:00:00    15
2024-12-01 00:00:00    13
Name: wdsp, Length: 185772, dtype: object