### Installing depencencies

In [None]:
ENV["PYTHON"] = "C:\\Users\\lap2r\\AppData\\Local\\Programs\\Python\\Python311\\python.exe"
ENV["PYTHON"] = "C:\\Users\\lap2r\\AppData\\Local\\Programs\\Python\\Python311\\python.exe"

### Environment configuration

In [None]:
using PyCall
pushfirst!(PyVector(pyimport("sys")["path"]), joinpath(@__DIR__, ".."))
paths_rel = pyimport("paths_rel")

weather_data_file_path = "../" * paths_rel.REL_WEATHER_DATA_FILE

In [None]:
# PLOTS_DEFAULTS = Dict(:dpi => 600)

# https://docs.juliaplots.org/latest/generated/attributes_axis/
# https://docs.juliaplots.org/latest/generated/attributes_plot/
# https://docs.juliaplots.org/latest/generated/attributes_subplot/

using Plots.PlotMeasures
using ColorSchemes

default(
    legend=true,
    left_margin=5mm,
    right_margin=5mm,
    top_margin=5mm,
    bottom_margin=5mm,
    xrotation=90,
    draw_arrow=true,
    grid=false,
    minorgrid=false,
    dpi=600,
    size=(800, 800),
    color=RGB(250 / 255, 135 / 255, 117 / 255),
    linecolor=RGB(250 / 255, 135 / 255, 117 / 255),
    markerstrokecolor=RGB(250 / 255, 135 / 255, 117 / 255),
    palette=:matter,
)

## Weather EDA

In [None]:
using CSV
using DataFrames

weatherDf = DataFrame(CSV.File(weather_data_file_path))

first(weatherDf, 1)

In [None]:
using Dates


weatherDf.year = Dates.year.(weatherDf.day_datetime)
weatherDf.month = Dates.month.(weatherDf.day_datetime)
weatherDf.day = Dates.day.(weatherDf.day_datetime)
weatherDf.week = Dates.week.(weatherDf.day_datetime)
weatherDf.dayofweek = Dates.dayofweekofmonth.(weatherDf.day_datetime)


first(weatherDf, 1)

In [None]:
using Printf
using Plots

histogram(
    weatherDf.day_temp,
    bins=100,
    xlabel="Temperature (Celsius)",
    ylabel="Frequency",
    title="\nDistribution of Daily Temperature",
    yformatter=y -> @sprintf("%d", y),
    bar_width=0.3,
    xticks=-20:5:40,
    rotation=0
)

In [None]:
using StatsPlots

@df weatherDf boxplot(
    :day_conditions,
    :day_temp,
    xlabel="Conditions",
    ylabel="Temperature (Celsius)",
    title="\nComparison of Daily Temperature across Conditions"
)

In [None]:
# weatherDf.month convert to string from number to January, February, etc.
norMonth = Dates.monthname.(weatherDf.month)

boxplot(
    norMonth,
    weatherDf.day_temp,
    label="°C",
    xlabel="Month",
    ylabel="Temperature (°C)",
    title="\nTemperature by Month",
    xticks=:all,
    boxpoints="all", 
    kind="box", 
    pointpos=0,
    color=[:pink :black :orange]
    # pointpos=0,
)

In [None]:
# Violin plot of temperature by month to show the variation in temperature across different months.
violin(weatherDf.month, weatherDf.day_temp, label="", xlabel="Month", ylabel="Temperature (°C)", title="Temperature by Month")

In [None]:
conditions_by_month = combine(groupby(weatherDf, [:month, :day_conditions]), nrow => :count)
unstacked_conditions = unstack(conditions_by_month, :month, :day_conditions, :count)

last_ind = length(names(unstacked_conditions)[2:end]) - 1

# bar(
#     [unstacked_conditions[:, col] for col in names(unstacked_conditions)[2:end]],
#     label=names(unstacked_conditions)[2:end],
#     xlabel="Month",
#     ylabel="Count",
#     title="\nProportion of Days with Different\nWeather Conditions by Month",
#     stacked=true,
#     xticks=1:12,
#     color=[ColorSchemes.rainbow[i] for i in 1:last_ind],
#     linecolor=:black,
#     rotation=0,
# )
println(Plots.backend())

b = bar(
    [unstacked_conditions[:, col] for col in names(unstacked_conditions)[2:end]],
    label=[[names(unstacked_conditions)[2:end][i][1:3]] for i in 1:length(names(unstacked_conditions)[2:end])],
    xlabel="Month",
    ylabel="Count",
    title="\nProportion of Days with Different\nWeather Conditions by Month",
    stacked=true,
    xticks=1:last_ind,
    color=[ColorSchemes.rainbow[i] for i in 1:last_ind],
    linecolor=:black,
    # legend=:outertopright,
    rotation=0,
)
display(b)

In [None]:
[[names(unstacked_conditions)[2:end][i][1:3]] for i in 1:length(names(unstacked_conditions)[2:end])]

In [None]:
using Dates
start_date = Date.("2022-02-01", "yyyy-mm-dd")
end_date = Date.(Dates.now())

p = plot(weatherDf.day_datetime, weatherDf.day_precip, seriestype=:line, label="Precipitation")
xticks!(p, Dates.value.([start_date:Dates.Month(1):end_date;]), Dates.format.([start_date:Dates.Month(1):end_date;], "yyyy-mm"))


In [None]:
density(
    weatherDf.day_temp,
    label="°C",
    xlabel="Temperature (°C)",
    ylabel="Density",
    title="\nDistribution of Temperature",
    rotation=0
)

In [None]:
conditions_by_month = combine(groupby(weatherDf, [:month, :day_conditions]), nrow => :count)
unstacked_conditions = unstack(conditions_by_month, :month, :day_conditions, :count)
bar(
    [unstacked_conditions[:, col] for col in names(unstacked_conditions)[2:end]], 
    label=names(unstacked_conditions)[2:end],
    xlabel="Month",
    ylabel="Count", 
    title="\nProportion of Days with Different\nWeather Conditions by Month",
    stacked=true,
    palette=:matter,
)