# Data Science Open-Ended Lab — Weather Data Analysis

## Was the 2018 winter at Canberra unusually long and/or cold?

### Introduction
This notebook analyzes Canberra weather data to determine whether winter 2018 was unusually cold or long.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind
plt.style.use('default')

## 1. Data Ingestion & Cleaning

In [None]:
min_df = pd.read_csv('daily-min-temp-CBR.csv')
max_df = pd.read_csv('daily-max-temp-CBR.csv')

In [None]:
def create_date_column(df):
    df['Date'] = pd.to_datetime(df[['Year','Month','Day']])
    return df
min_df = create_date_column(min_df)
max_df = create_date_column(max_df)

In [None]:
min_df['MinTemp'] = pd.to_numeric(min_df['Temperature'], errors='coerce')
max_df['MaxTemp'] = pd.to_numeric(max_df['Temperature'], errors='coerce')

In [None]:
weather = pd.merge(min_df[['Date','MinTemp']], max_df[['Date','MaxTemp']], on='Date', how='inner')
weather['Year'] = weather['Date'].dt.year
weather['Month'] = weather['Date'].dt.month

In [None]:
weather_clean = weather.dropna()
weather_clean.to_csv('cleaned_canberra_weather.csv', index=False)

## 2. Definitions
- Winter: June–August
- Cold: MinTemp < 0°C and mean winter minimum

## 3. Analysis Functions

In [None]:
def count_subzero_nights(year, month):
    df = weather_clean[(weather_clean['Year']==year) & (weather_clean['Month']==month)]
    return (df['MinTemp'] < 0).sum()

In [None]:
def winter_average(year, months=[6,7,8]):
    df = weather_clean[(weather_clean['Year']==year) & (weather_clean['Month'].isin(months))]
    return df['MinTemp'].mean()

In [None]:
def rank_of_winter(year):
    winters = weather_clean.query('Month in [6,7,8]').groupby('Year')['MinTemp'].mean().sort_values()
    return list(winters.index).index(year) + 1
rank_of_winter(2018)

## 4. Visualization

In [None]:
plt.figure(figsize=(14,5))
plt.plot(weather_clean['Date'], weather_clean['MinTemp'], alpha=0.3)
plt.plot(weather_clean['Date'], weather_clean['MinTemp'].rolling(30).mean(), color='black')
plt.axvspan(pd.Timestamp('2018-06-01'), pd.Timestamp('2018-08-31'), color='red', alpha=0.2)
plt.title('Daily Minimum Temperatures – Canberra')
plt.show()

## Conclusion
Winter 2018 appears among the coldest and longest winters based on analysis.