# **Importing Necessary Lieraries**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# from google.colab import files
# import io
from scipy import stats
from scipy.stats import kendalltau
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_squared_error
from math import sqrt

In [2]:
import folium
from folium.plugins import FastMarkerCluster


# **Loading the datasets**

In [3]:
data = pd.read_csv('all_yearly.csv')
data2 = pd.read_excel('Weather (1).xlsx')

**Ensure consistent column names for merging**

In [4]:
data.rename(columns={'station_id': 'Station ID'}, inplace=True)

**Merge the datasets on 'Station ID'**

In [5]:
merged_df = pd.merge(data, data2, on='Station ID')
merged_df

Unnamed: 0,Station ID,year,n_rain,R5mm,R10mm,R20mm,SDII,PRCPTOT,CDD,CWD,...,TN10p,TN90p,TX10p,TX90p,WSDI,CSDI,GSL,Latitude,Longitude,Direction
0,ATHENRY,2012,275,,,,,,,,...,,,,,,,328.0,53.3,-8.8,north-west
1,ATHENRY,2013,365,78.0,33.0,9.0,6.703,1112.7,29.1,87.3,...,,,,,,,321.0,53.3,-8.8,north-west
2,ATHENRY,2014,365,96.0,35.0,5.0,6.468,1235.3,31.9,58.1,...,,,,,,,337.0,53.3,-8.8,north-west
3,ATHENRY,2015,365,116.0,50.0,10.0,7.527,1543.1,34.5,85.3,...,,,,,,,353.0,53.3,-8.8,north-west
4,ATHENRY,2016,366,76.0,23.0,3.0,5.623,1045.8,30.0,59.2,...,,,,,,,304.0,53.3,-8.8,north-west
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1684,VALENTIA OBSERVATORY,2019,365,117.0,59.0,11.0,8.039,1664.0,37.0,84.6,...,5.479,24.110,4.384,9.041,8.0,0.0,359.0,51.9,-10.2,south-west
1685,VALENTIA OBSERVATORY,2020,366,124.0,64.0,20.0,9.283,1912.3,60.6,108.9,...,4.918,14.208,2.732,12.568,12.0,0.0,360.0,51.9,-10.2,south-west
1686,VALENTIA OBSERVATORY,2021,365,101.0,47.0,17.0,7.700,1524.6,61.4,95.8,...,5.753,20.822,6.027,13.973,6.0,0.0,349.0,51.9,-10.2,south-west
1687,VALENTIA OBSERVATORY,2022,365,109.0,60.0,19.0,8.786,1651.7,37.3,108.4,...,4.670,24.725,4.121,18.956,22.0,10.0,340.0,51.9,-10.2,south-west


**Create a copy of the merged data**set

In [6]:
merged_df_copy = merged_df.copy()

# **outliers handling**

In [7]:
# Define the columns to check for outliers
columns_to_check = [
    'year', 'n_rain', 'R5mm', 'R10mm', 'R20mm', 'SDII', 'PRCPTOT',
    'CDD', 'CWD', 'Rx1day', 'Rx5day', 'R95pTOT', 'R99pTOT', 'n_maxdy',
    'n_mindy', 'FD', 'TR', 'SU', 'ID', 'TNn', 'TNx', 'TXn', 'TXx',
    'DTR', 'TN10p', 'TN90p', 'TX10p', 'TX90p', 'WSDI', 'CSDI', 'GSL'
]

# Initialize a dictionary to store the count of outliers for each column
outlier_counts = {}

for column in columns_to_check:
    # Calculate Q1 (25th percentile) and Q3 (75th percentile)
    Q1 = merged_df_copy[column].quantile(0.25)
    Q3 = merged_df_copy[column].quantile(0.75)
    IQR = Q3 - Q1

    # Define bounds for outliers
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Count outliers
    num_outliers = ((merged_df_copy[column] < lower_bound) | (merged_df_copy[column] > upper_bound)).sum()

    # Store the count in the dictionary
    outlier_counts[column] = num_outliers

# Convert the dictionary to a DataFrame for better readability
outlier_counts_df = pd.DataFrame(list(outlier_counts.items()), columns=['Column', 'Outlier Count'])

# Display the DataFrame with outlier counts
print(outlier_counts_df)


     Column  Outlier Count
0      year              0
1    n_rain            615
2      R5mm             32
3     R10mm             38
4     R20mm             76
5      SDII             20
6   PRCPTOT             44
7       CDD             77
8       CWD             53
9    Rx1day             25
10   Rx5day             48
11  R95pTOT             41
12  R99pTOT             64
13  n_maxdy            497
14  n_mindy            505
15       FD             12
16       TR              1
17       SU            121
18       ID            241
19      TNn             27
20      TNx              6
21      TXn             21
22      TXx             12
23      DTR              0
24    TN10p             14
25    TN90p             10
26    TX10p             28
27    TX90p             10
28     WSDI             24
29     CSDI            238
30      GSL             45


In [8]:
# Calculate Q1 (25th percentile) and Q3 (75th percentile)
Q1 = merged_df_copy['PRCPTOT'].quantile(0.25)
Q3 = merged_df_copy['PRCPTOT'].quantile(0.75)
IQR = Q3 - Q1

# Define bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Remove rows where 'PRCPTOT' is an outlier
cleaned_df = merged_df_copy[(merged_df_copy['PRCPTOT'] >= lower_bound) & (merged_df_copy['PRCPTOT'] <= upper_bound)]

# Display the cleaned DataFrame
print(cleaned_df)

                Station ID  year  n_rain   R5mm  R10mm  R20mm   SDII  PRCPTOT  \
1                  ATHENRY  2013     365   78.0   33.0    9.0  6.703   1112.7   
2                  ATHENRY  2014     365   96.0   35.0    5.0  6.468   1235.3   
3                  ATHENRY  2015     365  116.0   50.0   10.0  7.527   1543.1   
4                  ATHENRY  2016     366   76.0   23.0    3.0  5.623   1045.8   
5                  ATHENRY  2017     365   80.0   33.0    6.0  5.958   1167.8   
...                    ...   ...     ...    ...    ...    ...    ...      ...   
1678  VALENTIA OBSERVATORY  2013     365  104.0   52.0   18.0  7.900   1619.4   
1681  VALENTIA OBSERVATORY  2016     366   93.0   47.0   13.0  7.973   1586.7   
1682  VALENTIA OBSERVATORY  2017     365   97.0   46.0   13.0  7.313   1557.6   
1686  VALENTIA OBSERVATORY  2021     365  101.0   47.0   17.0  7.700   1524.6   
1687  VALENTIA OBSERVATORY  2022     365  109.0   60.0   19.0  8.786   1651.7   

        CDD    CWD  ...  TN

# **Unique list of values**

In [9]:
df_unq_loc = cleaned_df.drop_duplicates(subset=['Station ID','Direction'])
df_unq_loc

Unnamed: 0,Station ID,year,n_rain,R5mm,R10mm,R20mm,SDII,PRCPTOT,CDD,CWD,...,TN10p,TN90p,TX10p,TX90p,WSDI,CSDI,GSL,Latitude,Longitude,Direction
1,ATHENRY,2013,365,78.0,33.0,9.0,6.703,1112.7,29.1,87.3,...,,,,,,,321.0,53.3,-8.8,north-west
13,BALLYHAISE,1994,356,80.0,28.0,2.0,5.811,1075.1,33.0,49.1,...,,,,,,,,54.1,-7.3,north-east
44,BELMULLET,1957,365,81.0,26.0,6.0,5.688,1086.5,34.6,56.5,...,5.753,17.534,5.753,16.986,31.0,0.0,359.0,54.2,-10.0,north-west
112,BIRR,1955,365,57.0,22.0,1.0,5.825,739.8,21.0,49.6,...,18.356,12.329,12.603,17.534,6.0,14.0,327.0,53.1,-7.9,south-east
167,CASEMENT,1954,365,47.0,19.0,2.0,5.394,809.1,47.4,80.7,...,,,,,,,,53.3,-6.4,north-east
238,CLAREMORRIS,1944,366,72.0,33.0,8.0,6.147,1063.5,32.0,56.7,...,,,,,,,,53.7,-9.0,north-west
318,CLONES,1950,365,65.0,21.0,4.0,5.499,1006.4,62.0,88.9,...,,,,,,,,54.2,-7.2,north-east
381,CORK AIRPORT,1962,365,61.0,31.0,11.0,7.647,1009.4,39.2,79.2,...,16.614,3.816,15.665,5.568,0.0,0.0,335.0,51.8,-8.5,south-west
443,DUBLIN AIRPORT,1941,365,47.0,16.0,2.0,5.28,702.3,36.9,61.1,...,,,,,,,,53.4,-6.2,north-east
526,DUNSANY,1941,361,52.0,21.0,3.0,6.032,784.1,44.5,54.4,...,,,,,,,,53.5,-6.7,north-east


**Add the Precipitation to the graph to understand differences**

In [10]:
df_s = cleaned_df.groupby(['Station ID'])['PRCPTOT'].agg(['min','mean','max'])
df_s

Unnamed: 0_level_0,min,mean,max
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ATHENRY,1045.8,1254.136364,1552.7
BALLYHAISE,723.0,993.152174,1226.6
BELMULLET,895.4,1167.171642,1490.8
BIRR,552.1,798.690741,1084.3
CASEMENT,541.0,718.733333,992.4
CLAREMORRIS,826.0,1145.350685,1606.4
CLONES,617.9,908.522414,1156.8
CORK AIRPORT,871.2,1195.754839,1544.4
DUBLIN AIRPORT,524.4,721.912195,1073.0
DUNSANY,564.1,818.206667,1024.4


**Merge the unique locations and precipitation**

In [11]:
df_s = df_s.reset_index()
df_unq_temp = pd.merge(df_unq_loc.loc[:,['Station ID','Direction','Latitude','Longitude']],
                       df_s,
                       how='left',
                       on=['Station ID']
                      )
df_unq_temp

Unnamed: 0,Station ID,Direction,Latitude,Longitude,min,mean,max
0,ATHENRY,north-west,53.3,-8.8,1045.8,1254.136364,1552.7
1,BALLYHAISE,north-east,54.1,-7.3,723.0,993.152174,1226.6
2,BELMULLET,north-west,54.2,-10.0,895.4,1167.171642,1490.8
3,BIRR,south-east,53.1,-7.9,552.1,798.690741,1084.3
4,CASEMENT,north-east,53.3,-6.4,541.0,718.733333,992.4
5,CLAREMORRIS,north-west,53.7,-9.0,826.0,1145.350685,1606.4
6,CLONES,north-east,54.2,-7.2,617.9,908.522414,1156.8
7,CORK AIRPORT,south-west,51.8,-8.5,871.2,1195.754839,1544.4
8,DUBLIN AIRPORT,north-east,53.4,-6.2,524.4,721.912195,1073.0
9,DUNSANY,north-east,53.5,-6.7,564.1,818.206667,1024.4


In [12]:
df_s = cleaned_df.groupby(['Direction'])['PRCPTOT'].agg(['min','mean','max'])
df_s

Unnamed: 0_level_0,min,mean,max
Direction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
north-central,693.1,947.856818,1309.1
north-east,524.4,843.108092,1453.3
north-west,826.0,1220.932372,1647.9
south-central,700.7,894.784615,1038.7
south-east,552.1,874.216606,1387.7
south-west,658.3,1081.386471,1651.7


# **Map of percipitation according to regions**

In [13]:


# Function to map directions to colors
def direction_to_color(direction):
    color_map = {
        "north-west": "darkblue",
        "north-east": "blue",
        "south-east": "orange",
        "south-west": "red",
        "south-central": "purple",
        "north-central": "green"
    }
    return color_map.get(direction, "gray")  # Default color if direction is not found

# Summary statistics for each direction
direction_stats = {
    "north-central": {"min": 693.1, "mean": 947.856818, "max": 1309.1},
    "north-east": {"min": 524.4, "mean": 843.108092, "max": 1453.3},
    "north-west": {"min": 826.0, "mean": 1220.932372, "max": 1647.9},
    "south-central": {"min": 700.7, "mean": 894.784615, "max": 1038.7},
    "south-east": {"min": 552.1, "mean": 874.216606, "max": 1387.7},
    "south-west": {"min": 658.3, "mean": 1081.386471, "max": 1651.7}
}

Lat = 53.390862
Long = -6.158100

# Assuming df_unq_temp is your DataFrame with Latitude, Longitude, Direction columns
locations = list(zip(df_unq_temp.Latitude, df_unq_temp.Longitude))

map2 = folium.Map(location=[Lat, Long], zoom_start=7)

# Add details to the markers with color based on direction and include precipitation details
for i in range(len(locations)):
    direction = df_unq_temp.iloc[i]['Direction']
    color = direction_to_color(direction)
    stats = direction_stats.get(direction, {"min": "N/A", "mean": "N/A", "max": "N/A"})
    popup_text = (f"Direction: {direction}<br>"
                  f"Mean Precipitation: {stats['mean']}<br>"
                  f"Min Precipitation: {stats['min']}<br>"
                  f"Max Precipitation: {stats['max']}")
    folium.Marker(locations[i],
                  popup=popup_text,
                  icon=folium.Icon(color=color)
                 ).add_to(map2)

# Display the map
map2


       "north-west": "darkblue",
        "north-east": "blue",
        "south-east": "orange",
        "south-west": "red",
        "south-central": "purple",
        "north-central": "green"

In [14]:
df_s = cleaned_df.groupby(['Direction'])['TXx'].agg(['min','mean','max'])
df_s

Unnamed: 0_level_0,min,mean,max
Direction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
north-central,23.93,28.228182,31.41
north-east,20.2,25.168403,33.05
north-west,20.4,25.387328,30.5
south-central,23.06,26.688462,31.0
south-east,19.6,25.136546,32.2
south-west,18.9,25.063583,32.0


 Minimum Values (Min):

  1. Highest Minimum: The highest minimum value is observed in the north-central direction (23.93).
    Lowest Minimum: The lowest minimum value is observed in the south-west direction (18.90).
    Range: The minimum values across directions range from 18.90 to 23.93, indicating variability in the lower limit across directions.

2. Mean Values:

    Highest Mean: The north-central direction has the highest mean value (28.228182), suggesting it generally experiences higher values compared to other directions.
    Lowest Mean: The south-west direction has the lowest mean value (25.063583), indicating relatively lower average values.
    Consistency: The mean values are relatively close, ranging from 25.063583 to 28.228182, suggesting moderate variation across directions.

3. Maximum Values (Max):

    Highest Maximum: The highest maximum value is observed in the north-east direction (33.05).
    Lowest Maximum: The lowest maximum value is observed in the north-west direction (30.50).
    Range: The maximum values range from 30.50 to 33.05, showing a moderate spread in the highest observed values across different directions.

4. Comparative Analysis:

    North-Central: Exhibits the highest minimum and mean values, indicating a tendency towards consistently higher readings.
    North-East: Has the highest maximum value, suggesting extreme conditions may occasionally occur in this direction.
    South-West: Shows the lowest minimum and mean values, indicating generally lower readings in this direction.

5. General Trends:

    The north-central and south-central directions tend to have higher minimum and mean values, which might suggest more stable conditions in these directions.
    The north-east and south-east directions show more variability with higher maximum values, indicating possible fluctuations in conditions.
    The south-west direction consistently has the lowest values, suggesting it might be the least intense in terms of whatever metric is being measured.

6. Conclusion:

    The data suggests that the north-central direction is the most stable with consistently higher values.
    The north-east direction shows the most variability, with a broad range between its minimum and maximum values.
    The south-west direction appears to be the least intense, with the lowest values across the board.

# **Map of max temperature according to region**

In [15]:
# Function to map directions to colors
def direction_to_color(direction):
    color_map = {
        "north-west": "darkblue",
        "north-east": "blue",
        "south-east": "orange",
        "south-west": "red",
        "south-central": "purple",
        "north-central": "green"
    }
    return color_map.get(direction, "gray")


direction_stats = {
    "north-central": {"min": 23.93, "mean": 28.228182, "max": 31.41},
    "north-east": {"min": 20.20, "mean": 25.168403, "max": 33.05},
    "north-west": {"min": 20.40, "mean": 25.387328, "max": 30.50},
    "south-central": {"min": 23.06, "mean": 26.688462, "max": 31.00},
    "south-east": {"min": 19.60, "mean": 25.136546, "max": 32.20},
    "south-west": {"min": 18.90, "mean": 25.063583, "max": 32.00}
}

Lat = 53.390862
Long = -6.158100


locations = list(zip(df_unq_temp.Latitude, df_unq_temp.Longitude))

map2 = folium.Map(location=[Lat, Long], zoom_start=7)


for i in range(len(locations)):
    direction = df_unq_temp.iloc[i]['Direction']
    color = direction_to_color(direction)
    stats = direction_stats.get(direction, {"min": "N/A", "mean": "N/A", "max": "N/A"})
    popup_text = (f"Direction: {direction}<br>"
                  f"Mean Precipitation: {stats['mean']}<br>"
                  f"Min Precipitation: {stats['min']}<br>"
                  f"Max Precipitation: {stats['max']}")
    folium.Marker(locations[i],
                  popup=popup_text,
                  icon=folium.Icon(color=color)
                 ).add_to(map2)

# Display the map
map2


In [18]:
df_t = cleaned_df.groupby(['Station ID'])['TXx'].agg(['min','mean','max'])
df_t

Unnamed: 0_level_0,min,mean,max
Station ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ATHENRY,22.83,27.349091,30.47
BALLYHAISE,23.98,26.066842,30.77
BELMULLET,20.4,24.478657,29.9
BIRR,22.6,26.05,31.2
CASEMENT,22.8,25.851667,31.9
CLAREMORRIS,21.5,25.429077,30.5
CLONES,22.6,25.261404,30.5
CORK AIRPORT,21.1,24.108065,28.7
DUBLIN AIRPORT,21.3,24.540741,29.1
DUNSANY,22.1,25.905854,30.45


In [19]:
df_s = df_s.reset_index()
df_unq_temp = pd.merge(df_unq_loc.loc[:,['Station ID','Direction','Latitude','Longitude']],
                       df_t,
                       how='left',
                       on=['Station ID']
                      )
df_unq_temp

Unnamed: 0,Station ID,Direction,Latitude,Longitude,min,mean,max
0,ATHENRY,north-west,53.3,-8.8,22.83,27.349091,30.47
1,BALLYHAISE,north-east,54.1,-7.3,23.98,26.066842,30.77
2,BELMULLET,north-west,54.2,-10.0,20.4,24.478657,29.9
3,BIRR,south-east,53.1,-7.9,22.6,26.05,31.2
4,CASEMENT,north-east,53.3,-6.4,22.8,25.851667,31.9
5,CLAREMORRIS,north-west,53.7,-9.0,21.5,25.429077,30.5
6,CLONES,north-east,54.2,-7.2,22.6,25.261404,30.5
7,CORK AIRPORT,south-west,51.8,-8.5,21.1,24.108065,28.7
8,DUBLIN AIRPORT,north-east,53.4,-6.2,21.3,24.540741,29.1
9,DUNSANY,north-east,53.5,-6.7,22.1,25.905854,30.45


In [16]:
# Function to map directions to colors
def direction_to_color(direction):
    color_map = {
        "north-west": "darkblue",
        "north-east": "blue",
        "south-east": "orange",
        "south-west": "red",
        "south-central": "purple",
        "north-central": "green"
    }
    return color_map.get(direction, "gray") 


direction_stats = {
    "north-central": {"min": 23.93, "mean": 28.228182, "max": 31.41},
    "north-east": {"min": 20.20, "mean": 25.168403, "max": 33.05},
    "north-west": {"min": 20.40, "mean": 25.387328, "max": 30.50},
    "south-central": {"min": 23.06, "mean": 26.688462, "max": 31.00},
    "south-east": {"min": 19.60, "mean": 25.136546, "max": 32.20},
    "south-west": {"min": 18.90, "mean": 25.063583, "max": 32.00}
}

Lat = 53.390862
Long = -6.158100


locations = list(zip(df_unq_temp.Latitude, df_unq_temp.Longitude))

map2 = folium.Map(location=[Lat, Long], zoom_start=7)


for i in range(len(locations)):
    direction = df_unq_temp.iloc[i]['Direction']
    color = direction_to_color(direction)
    stats = direction_stats.get(direction, {"min": "N/A", "mean": "N/A", "max": "N/A"})
    popup_text = (f"Direction: {direction}<br>"
                  f"Mean TXx: {stats['mean']}<br>"
                  f"Min TXx: {stats['min']}<br>"
                  f"Max TXx: {stats['max']}")
    folium.Marker(locations[i],
                  popup=popup_text,
                  icon=folium.Icon(color=color)
                 ).add_to(map2)

# Display the map
map2


        "north-west": "darkblue",
        "north-east": "blue",
        "south-east": "orange",
        "south-west": "red",
        "south-central": "purple",
        "north-central": "green"