## Knock airport Weather
Data was taken from this link.
https://cli.fusio.net/cli/climate_data/webdata/hly4935.csv


Plot I: The temperature

The mean temperature each day

The mean temperature for each month

In [47]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import requests

print("Libraries imported successfully!")

Libraries imported successfully!


In [None]:
# URL of the dataset
url = "https://cli.fusio.net/cli/climate_data/webdata/hly4935.csv"

# Examine the first 20 lines of the file to understand the structure
try:
    response = requests.get(url) # Fetch the file content 
    lines = response.text.split('\n')[:20]  # First 20 lines

# Print detailed examination of the file structure
    print("Detailed examination of file structure:")

# Print a separator line for clarity
    print("=" * 50)

# Iterate through the first 20 lines and print details
    for i, line in enumerate(lines):
        if line.strip():  # Only print non-empty lines
            print(f"Line {i}:")
            print(f"  Length: {len(line)} characters")
            print(f"  Content: {repr(line)}")  # 'repr' will give us any special characters
        # Show how it splits by comma and semicolon
            print(f"  Split by comma: {line.split(',')}") 
            print(f"  Split by semicolon: {line.split(';')}")
        # Print a separator line between lines for clarity
            print("-" * 30)

# Handle exceptions if any
except Exception as e:
    print(f"Error examining file: {e}")


Detailed examination of file structure:
Line 0:
  Length: 27 characters
  Content: 'Station Name: KNOCK AIRPORT'
  Split by comma: ['Station Name: KNOCK AIRPORT']
  Split by semicolon: ['Station Name: KNOCK AIRPORT']
------------------------------
Line 1:
  Length: 22 characters
  Content: 'Station Height: 201 M '
  Split by comma: ['Station Height: 201 M ']
  Split by semicolon: ['Station Height: 201 M ']
------------------------------
Line 2:
  Length: 35 characters
  Content: 'Latitude:53.906  ,Longitude: -8.817'
  Split by comma: ['Latitude:53.906  ', 'Longitude: -8.817']
  Split by semicolon: ['Latitude:53.906  ,Longitude: -8.817']
------------------------------
Line 5:
  Length: 29 characters
  Content: 'date:  -  Date and Time (utc)'
  Split by comma: ['date:  -  Date and Time (utc)']
  Split by semicolon: ['date:  -  Date and Time (utc)']
------------------------------
Line 6:
  Length: 38 characters
  Content: 'rain:  -  Precipitation Amount (mm)\t  '
  Split by comma: ['rain:

In [None]:
# From your output, I can see the pattern. Let's load it correctly:
try:
    # Skip the header lines - the actual data starts after the variable , until we find the line with actual column names
    data = pd.read_csv(url, skiprows=24)  # Adjust this number based on what we find
    
    print("Data loaded successfully!")
    print(f"Shape: {data.shape}")
    print(f"Column names: {data.columns.tolist()}")
    print("\nFirst 5 rows:")
    print(data.head())
    print("\nData types:")
    print(data.dtypes)
    
except Exception as e:
    print(f"Error: {e}")

Data loaded successfully!
Shape: (256433, 21)
Column names: ['10-apr-1996 14:00', '0', '0.0', '0.1', '11.5', '0.2', '8.1', '3.9', '0.0.1', '0.3', '1016.7', '0.4', '0.5', '0.6', '0.7', '25', '81', '0.0.2', '35000', '32', '5']

First 5 rows:
   10-apr-1996 14:00  0  0.0  0.1  11.5  0.2   8.1   3.9  0.0.1  0.3  ... 0.4  \
0  31-jul-1996 08:00  0  0.0    0  11.5    0  11.1  10.7    0.0    0  ...   0   
1  31-jul-1996 09:00  0  0.0    0  11.6    0  10.7   9.8    0.0    0  ...   0   
2  31-jul-1996 10:00  0  0.0    0  12.9    0  11.3   9.8    0.0    0  ...   0   
3  31-jul-1996 11:00  0  0.0    0  14.5    0  10.8   7.0    0.0    0  ...   0   
4  31-jul-1996 12:00  0  0.0    0  14.4    0  10.9   7.3    0.0    0  ...   0   

   0.5 0.6  0.7  25  81 0.0.2  35000  32  5  
0    0   0    0  25  82   0.0  40000  45  5  
1    0   0    0  80  81   0.0   8000  32  7  
2    0   0    0  25  82   0.0  28000  35  6  
3    0   0    0   2  11   0.0  40000  40  6  
4    0   0    0   1  82   0.0  40000  43  6

  data = pd.read_csv(url, skiprows=24)  # Adjust this number based on what we find


In [None]:
# Let's see what columns we have and find the temperature data
print("All columns in our data:")
for i, col in enumerate(data.columns):
    print(f"{i}: {col}")

# Usually temperature columns have names like 'temp', 'temperature', or might be indicated in the header
# Let's look for numeric columns that could be temperature
print("\nLooking for temperature data...")

# Check which columns are numeric
numeric_cols = data.select_dtypes(include=[np.number]).columns
print(f"Numeric columns: {numeric_cols.tolist()}")

# Let's assume one of the first numeric columns is temperature
if len(numeric_cols) > 0:
    temp_col = numeric_cols[0]  # Use first numeric column
    print(f"We'll use '{temp_col}' for temperature")
else:
    # If no numeric columns, use the second column (assuming first is date)
    temp_col = data.columns[1]
    print(f"No numeric columns found, using '{temp_col}'")

# Check if we have a date column
date_col = None
for col in data.columns:
    if 'date' in col.lower() or 'time' in col.lower():
        date_col = col
        break

if date_col:
    print(f"Found date column: '{date_col}'")
    # Convert to datetime
    data[date_col] = pd.to_datetime(data[date_col])
    data.set_index(date_col, inplace=True)
    print("Date column set as index")
else:
    print("No clear date column found")
    # Create a date index based on row count (assuming hourly data)
    dates = pd.date_range('2023-01-01', periods=len(data), freq='H')
    data.index = dates
    print("Created hourly datetime index")

# Source: DeepSeek (Prompt: "Reading file .csv from url not working, help me to open so I can perform analysis on Python",
# and "Explain each step you are taking in detail, so you can tutor me on why do I need each step")

All columns in our data:
0: 10-apr-1996 14:00
1: 0
2: 0.0
3: 0.1
4: 11.5
5: 0.2
6: 8.1
7: 3.9
8: 0.0.1
9: 0.3
10: 1016.7
11: 0.4
12: 0.5
13: 0.6
14: 0.7
15: 25
16: 81
17: 0.0.2
18: 35000
19: 32
20: 5

Looking for temperature data...
Numeric columns: ['0', '0.1', '11.5', '0.2', '8.1', '3.9', '0.0.1', '0.3', '0.4', '0.6', '0.0.2']
We'll use '0' for temperature
No clear date column found
Created hourly datetime index


  dates = pd.date_range('2023-01-01', periods=len(data), freq='H')


Plot II: The Windspeed

The rolling windspeed (say over 24 hours)

The max windspeed for each day

The monthly mean of the daily max windspeeds (yer I am being nasty here)