In [12]:
import os
from pathlib import Path
import polars as pl
import matplotlib.pyplot as plt
from lib.helper import save_plot

In [13]:
data_file = "data/rdu-weather-history.csv"
df= pl.read_csv(data_file, separator=";")
df.head(5)


date,tmin,tmax,prcp,snow,snwd,awnd
str,i64,i64,f64,f64,f64,f64
"""2017-01-01""",41,50,0.54,0.0,0.0,6.49
"""2017-01-08""",15,27,0.0,0.0,1.2,4.92
"""2017-01-16""",44,51,0.0,0.0,0.0,4.47
"""2017-01-23""",43,60,0.28,0.0,0.0,9.84
"""2017-02-07""",49,76,0.0,0.0,0.0,12.53


In [14]:
df.describe()

statistic,date,tmin,tmax,prcp,snow,snwd,awnd
str,str,f64,f64,f64,f64,f64,f64
"""count""","""1977""",1977.0,1977.0,1977.0,1977.0,1976.0,1973.0
"""null_count""","""0""",0.0,0.0,0.0,0.0,1.0,4.0
"""mean""",,51.088012,72.298938,0.135771,0.012838,0.016953,6.074805
"""std""",,15.889527,15.796087,0.370521,0.227433,0.215338,2.954427
"""min""","""2017-01-01""",4.0,27.0,0.0,0.0,0.0,0.0
"""25%""",,37.0,60.0,0.0,0.0,0.0,3.8
"""50%""",,52.0,74.0,0.0,0.0,0.0,5.8
"""75%""",,66.0,86.0,0.06,0.0,0.0,7.8
"""max""","""2022-05-31""",78.0,102.0,4.96,7.0,5.9,20.36


In [15]:
df = df.with_columns(pl.col("date").cast(pl.Date))
df = df.with_columns([
    pl.col('date').dt.month().alias('month'),
    pl.col('date').dt.year().alias('year')
])

# Group by month and calculate mean for relevant columns
monthly_avg = df.group_by(["month"]).agg([
    pl.col("tmin").mean().alias("tmin_avg"),
    pl.col("tmax").mean().alias("tmax_avg"),
    pl.col("prcp").mean().alias("prcp_avg"),
    pl.col("snow").mean().alias("snow_avg")
]).sort(["month"])

In [16]:
monthly_avg

month,tmin_avg,tmax_avg,prcp_avg,snow_avg
i8,f64,f64,f64,f64
1,32.360215,51.978495,0.147419,0.063333
2,37.87574,59.331361,0.112959,0.014793
3,41.795699,63.543011,0.10871,0.010215
4,49.116667,73.244444,0.152944,0.0
5,59.317204,80.88172,0.14043,0.0
…,…,…,…,…
8,69.554839,89.070968,0.177742,0.0
9,63.86,83.786667,0.1282,0.0
10,53.587097,75.425806,0.145161,0.0
11,38.946667,61.486667,0.117533,0.0


In [22]:
# Plotting
plt.figure(figsize=(12, 6))

# Plotting tmin_avg and tmax_avg
plt.plot(monthly_avg["month"], monthly_avg["tmin_avg"], marker='o', label='Avg Min Temperature (°F)', color='blue')
plt.plot(monthly_avg["month"], monthly_avg["tmax_avg"], marker='o', label='Avg Max Temperature (°F)', color='red')

# Adding second y-axis for precipitation
ax2 = plt.gca().twinx()
ax2.bar(monthly_avg["month"], monthly_avg["prcp_avg"], alpha=0.3, color='green', label='Avg Precipitation (inches)')

# Labels and title
plt.title('Monthly Average Temperature and Precipitation')
plt.xlabel('Month')
plt.ylabel('Temperature (°F)')
ax2.set_ylabel('Precipitation (inches)')
plt.xticks(monthly_avg["month"])
plt.grid()

# Add legends
plt.legend(loc='upper left')
ax2.legend(loc='upper right')

# plt.show()
plt.close()

In [19]:

save_plot(plt, "plots/temp_precip_plot")

Plot saved as plots/temp_precip_plot.png


<Figure size 640x480 with 0 Axes>