# An analysis of monthly snowfall data from the Denver/Boulder National Weather Service. Source: https://www.weather.gov/bou/SeasonalSnowfall

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('denver_snowfall.csv', delim_whitespace=True)

In [3]:
non_months = ['Year', 'Total', 'Departure_Historical_Average', 'Departure_30_Year_Average']

In [4]:
months = [x for x in df.columns if x not in non_months]

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136 entries, 0 to 135
Data columns (total 16 columns):
Year                            136 non-null object
July                            136 non-null float64
Aug                             136 non-null float64
Sep                             118 non-null float64
Oct                             122 non-null float64
Nov                             129 non-null float64
Dec                             133 non-null float64
Jan                             134 non-null float64
Feb                             135 non-null float64
Mar                             134 non-null float64
April                           128 non-null float64
May                             101 non-null float64
June                            133 non-null float64
Total                           136 non-null float64
Departure_Historical_Average    136 non-null float64
Departure_30_Year_Average       136 non-null float64
dtypes: float64(15), object(1)
memory usage: 17.

## Some months use 'T' to indicate a trace amount of snow. In order to simplify this analysis, let's convert all columns to numeric values. Any 'T' values will be converted to NaN

In [5]:
for m in months:
    df[m] = pd.to_numeric(df[m], errors='coerce')

## Let's order the months by the highest average snowfall

In [17]:
df[months].mean().sort_values(ascending=False)

Mar      11.532090
April     9.435156
Dec       8.326316
Nov       7.802326
Feb       7.697037
Jan       6.695522
Oct       4.537705
May       2.280198
Sep       1.165254
June      0.005263
Aug       0.000000
July      0.000000
dtype: float64

## Has the order changed for the most recent 30 years?

In [19]:
df[months][0:30].mean().sort_values(ascending=False)

Mar      10.232143
Dec       8.113793
Jan       7.486207
April     7.400000
Nov       7.148276
Feb       7.075862
Oct       4.585185
May       1.236000
Sep       0.844828
June      0.000000
Aug       0.000000
July      0.000000
dtype: float64

## Now for each season, find the snowiest month

In [9]:
snowiest_months = df[months].idxmax(axis=1)

## What percentage of snow seasons have March as the snowiest month?

In [16]:
snowiest_months.value_counts()/df.Year.count()

Mar      0.352941
April    0.176471
Dec      0.125000
Feb      0.117647
Jan      0.073529
Nov      0.066176
Oct      0.051471
May      0.022059
Sep      0.014706
dtype: float64

## Which months most frequently have only a trace amount of snow?

In [14]:
df[months].isna().sum().sort_values(ascending=False)

May      35
Sep      18
Oct      14
April     8
Nov       7
June      3
Dec       3
Mar       2
Jan       2
Feb       1
Aug       0
July      0
dtype: int64

## Which months have the highest variance in snowfall amount?

In [15]:
df[months].std().sort_values(ascending=False)

Mar      7.672964
April    7.457804
Dec      7.211059
Nov      6.839996
Oct      5.450503
Feb      5.152176
Jan      4.873634
May      3.717527
Sep      3.078749
June     0.043198
Aug      0.000000
July     0.000000
dtype: float64

## For a slightly different perspective, let's order the months by the difference in max and min snowfall

In [22]:
(df[months].max() - df[months].min()).sort_values(ascending=False)

Dec      57.3
Nov      42.6
Mar      34.9
April    33.8
Oct      31.2
Jan      24.2
Feb      22.1
Sep      17.2
May      15.5
June      0.4
Aug       0.0
July      0.0
dtype: float64

## Let's find the percentage of seasons for which each month had at least a trace amount of snow

In [31]:
((df[months] > 0) | (df[months].isna())).sum()/df.Year.count()

July     0.000000
Aug      0.000000
Sep      0.338235
Oct      0.860294
Nov      0.992647
Dec      1.000000
Jan      1.000000
Feb      1.000000
Mar      1.000000
April    0.970588
May      0.647059
June     0.036765
dtype: float64