In [None]:
import warnings

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from windrose import WindroseAxes

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')

In [None]:
def select_period(df, year, month_num):
    '''
    Selects a year from a date range.
    df: Dataframe with datetime index
    year: Integer of year
    month_num: Integer of month number
    '''
    
    return df.loc[(df.index.year == year) & (df.index.month == month_num)]

In [None]:
def exclude_outliers(df):
    '''
    Excludes outliers using IQR.
    df: Dataframe from which outliers are to be excluded
    '''

    # Calculate the 25th and 75th percentiles (Q1 and Q3)
    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)

    # Calculate the interquartile range (IQR)
    IQR = Q3 - Q1

    # Identify outliers as observations outside the range (Q1 - 1.5 * IQR) to (Q3 + 1.5 * IQR)
    outliers = (df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))

    # Exclude the outliers from the dataframe
    return (
        df[
            ~(outliers).any(axis=1)
        ]
        .reset_index(drop=True)
        .dropna()
    )

In [None]:
def double_regplot(df, month):
    '''
    Creates double y-axis regplots for different months.
    df: Dataframe for a heatmap
    month: String of the month name
    '''

    # Initiate plot
    _, ax1 = plt.subplots()
    ax2 = ax1.twinx()

    # Create first y-axis regplot
    ax1.set_ylabel('Sunshine [h]', color='orange')
    ax1.tick_params(axis='y', colors='orange')
    ax1 = plt.gca()
    ax1.set_title(f'Relationship between Sunshine and Average Temperature for {month}\n from 1988 to 2021 in Straznice, South Moravia, Czechia')
    sns.regplot(data=df.loc[df['Month'] == month].reset_index(), x='Date', y='Sunshine_h', color='orange', ci=None, ax=ax1)

    # Create second y-axis regplot
    sns.regplot(data=df.loc[df['Month'] == month].reset_index(), x='Date', y='Temp_Avg_C', color='red', ci=None, ax=ax2)
    ax2.set_ylabel('Average Temperature [C]', color='red')
    ax2.tick_params(axis='y', colors='red')
    ax2.grid(False)

    plt.show()

In [None]:
def plot_bar_point(df, month):
    '''
    Creates double y-axis  bar and point plots for different months.
    df: Dataframe for a heatmap
    month: String of the month name
    '''

    # Initiate the first plot
    _, ax1 = plt.subplots()
    ax1 = plt.gca()
    ax1.set_title(f'Rain Fall and Humidity for {month}\n from 1988 to 2021 in Straznice, South Moravia, Czechia')
    ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90)
    ax2 = ax1.twinx()

    sns.barplot(x='Date', y='Rain_mm', data=df.loc[df['Month'] == month].reset_index(), color='blue', ci=None, ax=ax1)
    ax1.set_ylabel('Rain [mm]', color='blue')
    ax1.tick_params(axis='y', colors='blue')

    # Initiate the second plot
    sns.pointplot(x='Date', y='Humidity_Avg_Percent', data=df.loc[df['Month'] == month].reset_index(), color='magenta', markers='x', ax=ax2)
    ax2.set_ylabel('Humidity [%]', color='magenta')
    ax2.tick_params(axis='y', colors='magenta')
    ax2.grid(False)

    plt.show()

In [None]:
def plot_bar_line(df, month):
    '''
    Creates double y-axis bar and line plots for different months.
    df: Dataframe for a heatmap
    month: String of the month name
    '''

    # Initiate plot
    _, ax1 = plt.subplots()

    # Define the bar plot
    df.loc[df['Month'] == month, ['Snow_Height_cm', 'New_Snow_Height_cm']].plot(kind='bar', stacked=True, color=['steelblue','cyan'], ax=ax1)
    ax1.set_title(f'Snow Height, New Snow and Min Temperature for {month}\n from 1988 to 2021 in Straznice, South Moravia, Czechia')
    ax1.set_ylabel('Snow Height [cm]', color='steelblue')
    ax1.set_xlabel('Year')
    ax1.tick_params(axis='y', colors='steelblue')
    h, _ = ax1.get_legend_handles_labels()
    ax1.legend(h, ['Snow Height', 'New Snow Height'], fontsize='xx-small', bbox_to_anchor=(0.5, 1))

    # Define the line plot
    ax2 = ax1.twinx()
    ax2.plot(df.loc[df['Month'] == month, ['Temp_Min_C']].values, color='blue', marker='x')
    ax2.set_ylabel('Min Temperature [C]', color='blue')
    ax2.tick_params(axis='y', colors='blue')
    ax2.grid(False)

    plt.show()

In [None]:
def plot_windrose_period(df, month, period, start, stop):
    '''
    Creates a windrose plot for a period of time in a given time interval.
    df: Dataframe for winter months
    month: String of month name
    period: Integer of period length in years
    start: Integer of start year interval
    stop: Integer of end year interval
    '''

    # Setting up an interval
    y5_periods = list(range(start, stop, period))

    # Plotting a windrose for each period
    for i in y5_periods:

        ax = WindroseAxes.from_ax()

        is_month = df['Month'] == month
        is_period = df.index.year.isin([i, i+(period-1)])
        
        ax.bar(
            df.loc[is_month & is_period, 'Wind_Max_Direction_Dg'], 
            df.loc[is_month & is_period, 'Wind_Speed_Max_ms'], 
            normed=True, 
            opening=0.8, 
            edgecolor='white',
            bins=np.arange(7, 12, 1)
            )
        ax.set_title(f'Direction of Max Wind Speed in {month} \nbetween {i} and {i+(period-1)} in Straznice, South Moravia, Czechia')
        ax.set_legend()

        plt.show()

In [None]:
weather_all = pd.read_csv('weather_all.csv')

#### Imputing missing values with each year's mean and deleting outliers

In [None]:
years = list(range(1988, 2022))
months = [12, 1, 2]
moravia_weather = weather_all.copy()

In [None]:
for y in years:
    for m in months:
        selected_period = select_period(moravia_weather, y, m)

        is_year = moravia_weather.index.year == y
        is_month = moravia_weather.index.month == m
        
        moravia_weather.loc[(is_year) & (is_month)] = exclude_outliers(select_period(moravia_weather, y, m))
        period_mean = selected_period.mean()
        moravia_weather.loc[(is_year) & (is_month)] = select_period(moravia_weather, y, m).where(pd.notna, period_mean, axis=1)

# Re-instating original max wind direction data
moravia_weather = moravia_weather.loc[moravia_weather.index.year.isin(years)].drop('Wind_Max_Direction_Dg', axis=1)
moravia_weather['Wind_Max_Direction_Dg'] = weather_all.loc[weather_all.index.year.isin(years), 'Wind_Max_Direction_Dg']

#### Examining winter months

In [None]:
# Filtering for winter months
winter_months = (
    moravia_weather.loc[moravia_weather.index.month.isin([12,1,2])]
    .assign(Month=np.nan)
    )

# Creating a month identity column
winter_months.loc[winter_months.index.month == 12, 'Month'] = 'December'
winter_months.loc[winter_months.index.month == 1, 'Month'] = 'January'
winter_months.loc[winter_months.index.month == 2, 'Month'] = 'February'

##### Temperature Heatmap

In [None]:
winter_heatmap = winter_months.groupby([winter_months.index.year, 'Month']).mean().reset_index().set_index('Date')

In [None]:
# Initiate plot
fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(18, 6))
fig.suptitle('Heatmap of Mean Temperature in Winter Months from 1988 to 2021 in Straznice, South Moravia, Czechia', fontsize=16)

# Create heatmaps
sns.heatmap(winter_heatmap.loc[winter_heatmap['Month'] == 'December', ['Temp_Min_C', 'Temp_Avg_C', 'Temp_Max_C']].T, cmap='coolwarm', linewidths=.5, ax=ax1)
sns.heatmap(winter_heatmap.loc[winter_heatmap['Month'] == 'January', ['Temp_Min_C', 'Temp_Avg_C', 'Temp_Max_C']].T, cmap='coolwarm', linewidths=.5, ax=ax2)
sns.heatmap(winter_heatmap.loc[winter_heatmap['Month'] == 'February', ['Temp_Min_C', 'Temp_Avg_C', 'Temp_Max_C']].T, cmap='coolwarm', linewidths=.5, ax=ax3)

# Set subplot titles
ax1.set_title('December')
ax2.set_title('January')
ax3.set_title('February')

plt.show()

##### Temperature Regression

In [None]:
winter_reg = pd.melt(winter_heatmap.reset_index(), id_vars=['Date', 'Month'], value_vars=['Temp_Avg_C','Temp_Max_C','Temp_Min_C'], var_name='Temperature Type', value_name='Temperature [C]')

In [None]:
# Create a lmplot
lm = sns.lmplot(data=winter_reg, x='Date', y='Temperature [C]', hue='Temperature Type', col='Month', col_order=['December', 'January', 'February'], ci=None, fit_reg=True)

# Set main title
fig = lm.fig 
fig.suptitle('Linear Regression of Temperatures in Winter Months from 1988 to 2021 in Straznice, South Moravia, Czechia', fontsize=16)
fig.subplots_adjust(top=0.88)

plt.show()

##### Sunshine and Temperature Regression

In [None]:
double_regplot(winter_heatmap, 'December')

In [None]:
double_regplot(winter_heatmap, 'January')

In [None]:
double_regplot(winter_heatmap, 'February')

##### Wind Speed Line

In [None]:
winter_line = pd.melt(winter_heatmap.reset_index(), id_vars=['Date', 'Month'], value_vars=['Wind_Avg_Speed_ms','Wind_Speed_Max_ms'], var_name='Wind Speed Type', value_name='Wind Speed [m/s]')

In [None]:
# Initiate plot
fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(18, 6))
fig.suptitle('Wind Speed in Winter Months from 1988 to 2021 in Straznice, South Moravia, Czechia', fontsize=16)
fig.subplots_adjust(top=0.88)

# Create line plots
sns.lineplot(data=winter_line.loc[winter_line['Month'] == 'December'], x='Date', y='Wind Speed [m/s]', hue='Wind Speed Type', palette=['blue','red'], ax=ax1)
sns.lineplot(data=winter_line.loc[winter_line['Month'] == 'January'], x='Date', y='Wind Speed [m/s]', hue='Wind Speed Type', palette=['blue','red'], ax=ax2)
sns.lineplot(data=winter_line.loc[winter_line['Month'] == 'February'], x='Date', y='Wind Speed [m/s]', hue='Wind Speed Type', palette=['blue','red'], ax=ax3)

# Rename legend items
h, _ = ax1.get_legend_handles_labels()
ax1.legend(h, ['Average Wind Speed [m/s]', 'Max Wind Speed [m/s]'])
ax2.legend(h, ['Average Wind Speed [m/s]', 'Max Wind Speed [m/s]'])
ax3.legend(h, ['Average Wind Speed [m/s]', 'Max Wind Speed [m/s]'])

# Set subplot titles
ax1.set_title('December')
ax2.set_title('January')
ax3.set_title('February')

plt.show()

##### Rain Fall and Humidity Bar and Point

In [None]:
plot_bar_point(winter_heatmap, 'December')

In [None]:
plot_bar_point(winter_heatmap, 'January')

In [None]:
plot_bar_point(winter_heatmap, 'February')

##### (New) Snow Height and Min Temperature Bar and Line

In [None]:
plot_bar_line(winter_heatmap, 'December')

In [None]:
plot_bar_line(winter_heatmap, 'January')

In [None]:
plot_bar_line(winter_heatmap, 'February')

##### Windrose of Max Speed Wind Direction 

In [None]:
plot_windrose_period(winter_months, 'December', 5, 1999, 2021)

In [None]:
plot_windrose_period(winter_months, 'January', 5, 1999, 2021)

In [None]:
plot_windrose_period(winter_months, 'February', 5, 1999, 2021)