# Read CSV File to DataFrame

In [None]:
# imports
import pandas as pd

# data path
dataPath = 'D:\mattp\Documents\School\SeniorYear\DataAnalysis\WaterStreetPA_WthrMETAR_SNOW-2018.csv'

# read in data
data = pd.read_csv(dataPath, header=0)

# Investigate Each Feature

## Row Number

In [None]:
# listing of preprocessing info: type/processing method, null values, null value type, number of null values 
row_num_df = data.get('Unnamed: 0')
row_num_list = row_num_df.tolist()

# listing of summary stats: min, max, mean, median, std dev
import numpy

# row num range
row_min = min(row_num_list)
row_max = max(row_num_list)
print('Row Range: {}-{}'.format(row_min, row_max))

## Datetime

In [None]:
import datetime as dt
import matplotlib.pyplot as plt

datetime_df = data.get('datetime')
datetime_list = datetime_df.tolist()
dt_obj = dt.datetime.strptime(datetime_list[0], '%Y-%m-%d %H:%M:%S')
datetime_objects = [dt.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') for x in datetime_list]

# datetime range
datetime_min = min(datetime_list)
datetime_max = max(datetime_list)
print('Datetime Range: {} to {}'.format(datetime_min, datetime_max))

# Time delta plots
time_deltas = []
for i in range(row_max):
    delta = datetime_objects[i+1] - datetime_objects[i]
    detla_seconds = delta.total_seconds()
    time_deltas.append(detla_seconds)
    '''if detla_seconds > 1000:
        print(i)
        print(detla_seconds)'''

delta_min = min(time_deltas)
delta_max = max(time_deltas)
print('Delta Range: {}-{}'.format(delta_min, delta_max))

plt.hist(time_deltas, bins=75, range=(0, 75))
plt.show()

plt.hist(time_deltas, bins=100, range=(.01, 50))
plt.show()

plt.hist(time_deltas, bins=20, range=(75, 85))
plt.show()

plt.hist(time_deltas, bins=100, range=(85, 1000))
plt.show()

plt.hist(time_deltas, bins=100, range=(1000, 150000))
plt.show()

## PM 2.5

In [None]:
pm_df = data.get('PM25')

# Check for null values
is_null_values = pm_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# PM 2.5 summary stats
df_min = pm_df.min()
df_max = pm_df.max()
df_mean = pm_df.mean()
df_median = pm_df.median()
df_stddev = pm_df.std()
print('PM 2.5 Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = pm_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = pm_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}'.format(num_mode))

# PM 2.5 plots
plt.hist(pm_df, bins=140, range=(0,1400))
plt.show()

plt.hist(pm_df, bins=50, range=(0,50))
plt.show()

plt.hist(pm_df, bins=50, range=(50,100))
plt.show()

plt.hist(pm_df, bins=13, range=(100,1400))
plt.show()

# TODO: Plot histogram of the highest PM 2.5 reading of each day

## Temperature

In [None]:
temp_df = data.get('temp')
temp_list = temp_df.tolist()

# Check for null values
is_null_values = temp_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Temp summary stats
df_min = temp_df.min()
df_max = temp_df.max()
df_mean = temp_df.mean()
df_median = temp_df.median()
df_stddev = temp_df.std()
print('Temperature Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = temp_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = temp_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(temp_df.value_counts()))

# Temp plots
plt.hist(temp_df, bins=100)
plt.show()

## Dewpoint

In [None]:
dewpoint_df = data.get('dewpoint')
dewpoint_list = dewpoint_df.tolist()

# Check for null values
is_null_values = dewpoint_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Dewpoint summary stats
df_min = dewpoint_df.min()
df_max = dewpoint_df.max()
df_mean = dewpoint_df.mean()
df_median = dewpoint_df.median()
df_stddev = dewpoint_df.std()
print('Dewpoint Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = dewpoint_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = dewpoint_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(dewpoint_df.value_counts()))

# Dewpoint plots
plt.hist(dewpoint_df, bins=100)
plt.show()

## Relative Humidity

In [None]:
rh_df = data.get('RH')
rh_list = rh_df.tolist()

# Check for null values
is_null_values = rh_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# RH summary stats
df_min = rh_df.min()
df_max = rh_df.max()
df_mean = rh_df.mean()
df_median = rh_df.median()
df_stddev = rh_df.std()
print('Relative Humidity Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = rh_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = rh_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(rh_df.value_counts()))

# RH plots
plt.hist(rh_df, bins=100)
plt.show()

## Wind Direction

In [None]:
wind_direction_df = data.get('windDIR')
wind_direction_list = wind_direction_df.tolist()

# Check for null values
is_null_values = wind_direction_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Wind direction summary stats
df_min = wind_direction_df.min()
df_max = wind_direction_df.max()
df_mean = wind_direction_df.mean()
df_median = wind_direction_df.median()
df_stddev = wind_direction_df.std()
print('Wind Direction Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = wind_direction_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = wind_direction_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(wind_direction_df.value_counts()))

# Wind direction plots
plt.hist(wind_direction_df, bins=100)
plt.show()

### Wind Direction Notes:
It seems like 0 may be a place holder value for when there is no wind direction reading.

## Wind Speed

In [None]:
wind_speed_df = data.get('windMPH')
wind_speed_list = wind_speed_df.tolist()

# Check for null values
is_null_values = wind_speed_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Wind speed summary stats
df_min = wind_speed_df.min()
df_max = wind_speed_df.max()
df_mean = wind_speed_df.mean()
df_median = wind_speed_df.median()
df_stddev = wind_speed_df.std()
print('Wind Speed Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = wind_speed_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = wind_speed_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(wind_speed_df.value_counts()))

# Wind speed plots
plt.hist(wind_speed_df, bins=40, range=(0,20))
plt.show()

plt.hist(wind_speed_df, bins=30, range=(20,35))
plt.show()

## Precipitation

In [None]:
precipitation_df = data.get('precip')
mslp_list = precipitation_df.tolist()

# Check for null values
is_null_values = precipitation_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Precipitation summary stats
df_min = precipitation_df.min()
df_max = precipitation_df.max()
df_mean = precipitation_df.mean()
df_median = precipitation_df.median()
df_stddev = precipitation_df.std()
print('Precipitation Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = precipitation_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = precipitation_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts (First 20):\n{}'.format(precipitation_df.value_counts().iloc[:20]))

# Precipitation plots
plt.hist(precipitation_df, bins=100)
plt.show()

plt.hist(precipitation_df, bins=100, range=(0, 0.1))
plt.show()

### Precipitation Notes:
Precipitation readings are in increments of 0.01 except for the ~5600 readings at 0.0001. Is this is a miscalibration reading of 0 or an intentional output?

## Mean Sea Level Pressure (MSLP)

In [None]:
mslp_df = data.get('mslp')
mslp_list = mslp_df.tolist()

# Check for null values
is_null_values = mslp_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# MSLP summary stats
df_min = mslp_df.min()
df_max = mslp_df.max()
df_mean = mslp_df.mean()
df_median = mslp_df.median()
df_stddev = mslp_df.std()
print('Mean Sea Level Pressure Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = mslp_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = mslp_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(mslp_df.value_counts()))

# MSLP plots
plt.hist(mslp_df, bins=100)
plt.show()

## Visibility

In [None]:
visibility_df = data.get('visibility')
visibility_list = visibility_df.tolist()

# Check for null values
is_null_values = visibility_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Visibility summary stats
df_min = visibility_df.min()
df_max = visibility_df.max()
df_mean = visibility_df.mean()
df_median = visibility_df.median()
df_stddev = visibility_df.std()
print('Visibility Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = visibility_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = visibility_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(visibility_df.value_counts()))

# Visibility plots
plt.hist(visibility_df, bins=50)
plt.show()

plt.hist(visibility_df, bins=50, range=(0, 4))
plt.show()

### Visibility Notes:
Values seem to be mostly stratified by whole numbers from 4-10 and by .5 from .5 to 2. There are however a small number of seemingly random decimal values. 

## Gust

In [None]:
gust_df = data.get('gust')
gust_list = gust_df.tolist()

# Check for null values
is_null_values = gust_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Gust summary stats
df_min = gust_df.min()
df_max = gust_df.max()
df_mean = gust_df.mean()
df_median = gust_df.median()
df_stddev = gust_df.std()
print('Gust Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = gust_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = gust_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(gust_df.value_counts()))

# Gust plots
plt.hist(gust_df, bins=100)
plt.show()

plt.hist(gust_df, bins=100, range=(30,50))
plt.show()

### Gust Notes:
Gust minimum is higher than expected. Over what duration is this recorded?
It seems like the gust speed recorder capped out at the max speed of just over 48 mph.

## Weather Code (WX Code)
https://graphical.weather.gov/definitions/defineWxNoTable.html
https://www.e-education.psu.edu/files/meteo101/image/Section13/metar_decoding1203.html

In [None]:
wx_code_df = data.get('wxcodes')
wx_code_list = wx_code_df.tolist()

# Check for null values
is_null_values = wx_code_df.isnull().values.any()
print('Null Values: {}'.format(is_null_values))

# Display null value
print('Null value test df: {}'.format(wx_code_df.get(0)))
print('Null value test list: {}'.format(wx_code_list[0]))

# Display num and percentage null values
num_null = wx_code_df.isna().sum()
null_percent = round(num_null/row_max*100, 3)
print('Null num: {}, {}%\n'.format(num_null, null_percent))

# WX Code Dictionary
# Get all unique codes (by component) in alphabetical order
value_counts = wx_code_df.value_counts()

code_set = set()
for key in value_counts.keys():
    code_components = key.split(' ')
    for component in code_components:
        code_set.add(component)
print('Sorted set of WX Code components: {}\n'.format(sorted(code_set)))

codes = {
    'BR':'Mist',
    'FG':'Fog',
    'FZFG':'Freezing Fog',
    'HZ':'Haze',
    'RA':'Rain',
    '+RA':'Heavy Rain',
    '-RA':'Light Rain',
    'SN':'Snow',
    '+SN':'Heavy Snow',
    '-SN':'Light Snow',
    'SQ':'Squalls',
    'TS':'Thunderstorm',
    'TSRA':'Thunderstorm and Rain',
    '+TSRA':'Thunderstorm and Heavy Rain',
    '-TSRA':'Thunderstorm and Light Rain',
    'UP':'Unknown Precipitation',
    'VCTS':'Thunderstorm in the Vicinity'
}

key_translations = []
for key in value_counts.keys():
    translation = []
    code_components = key.split(' ')
    for component in code_components:
        translation.append(codes[component])
    key_translations.append(' and '.join(translation))

value_counts= value_counts.to_frame()
value_counts.insert(0, 'Translation', key_translations)

print('Value counts:\n{}'.format(value_counts))
print('Num Categories: {}'.format(len(value_counts)))

## Snow Depth

In [None]:
top_snow_depth_df = data.get('(top) Snow Depth (in)')
top_snow_depth_list = top_snow_depth_df.tolist()

# Check for null values
is_null_values = top_snow_depth_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Top Snow Depth summary stats
df_min = top_snow_depth_df.min()
df_max = top_snow_depth_df.max()
df_mean = top_snow_depth_df.mean()
df_median = top_snow_depth_df.median()
df_stddev = top_snow_depth_df.std()
print('Top Snow Depth Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = top_snow_depth_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = top_snow_depth_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(top_snow_depth_df.value_counts()))

# Top Snow Depth plots
plt.hist(top_snow_depth_df, bins=100)
plt.show()

plt.hist(top_snow_depth_df, bins=100, range=(.0001, 3))
plt.show()

plt.hist(top_snow_depth_df, bins=100, range=(2, 3))
plt.show()

In [None]:
middle_snow_depth_df = data.get('(middle) Snow Depth (in)')
middle_snow_depth_list = middle_snow_depth_df.tolist()

# Check for null values
is_null_values = middle_snow_depth_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Middle Snow Depth summary stats
df_min = middle_snow_depth_df.min()
df_max = middle_snow_depth_df.max()
df_mean = middle_snow_depth_df.mean()
df_median = middle_snow_depth_df.median()
df_stddev = middle_snow_depth_df.std()
print('Middle Snow Depth Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = middle_snow_depth_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = middle_snow_depth_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(middle_snow_depth_df.value_counts()))

# Middle Snow Depth plots
plt.hist(middle_snow_depth_df, bins=100)
plt.show()

plt.hist(middle_snow_depth_df, bins=100, range=(.0001, 6.5))
plt.show()

In [None]:
bottom_snow_depth_df = data.get('(bottom) Snow Depth (in)')
bottom_snow_depth_list = middle_snow_depth_df.tolist()

# Check for null values
is_null_values = bottom_snow_depth_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Bottom Snow Depth summary stats
df_min = bottom_snow_depth_df.min()
df_max = bottom_snow_depth_df.max()
df_mean = bottom_snow_depth_df.mean()
df_median = bottom_snow_depth_df.median()
df_stddev = bottom_snow_depth_df.std()
print('Bottom Snow Depth Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = bottom_snow_depth_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = bottom_snow_depth_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(bottom_snow_depth_df.value_counts()))

# Bottom Snow Depth plots
plt.hist(bottom_snow_depth_df, bins=100)
plt.show()

plt.hist(bottom_snow_depth_df, bins=100, range=(0.0001, 3.5))
plt.show()

### Snow Depth Notes:
You would think that the bottom snow depth would have the most non-zero data since it is nearest to the ground. This is the opposite of what we observe here. Each level has spikes in data repetition. Why is this? There is a particularly large spike at 1.97 in the top snow depth column. Is this because this is the highest point that the instrument could accurately measure? There are hundreds of data points above that level however. Perhaps some ice or snow got stuck to the measuring instrument.

## Snow Temperature

In [None]:
top_snow_temp_df = data.get('(top) Snow Temp. (deg. F)')
top_snow_temp_list = top_snow_depth_df.tolist()

# Check for null values
is_null_values = top_snow_temp_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Top Snow Temp summary stats
df_min = top_snow_temp_df.min()
df_max = top_snow_temp_df.max()
df_mean = top_snow_temp_df.mean()
df_median = top_snow_temp_df.median()
df_stddev = top_snow_temp_df.std()
print('Top Snow Temperature Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = top_snow_temp_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = top_snow_temp_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(top_snow_temp_df.value_counts()))

# Top Snow Temp plots
plt.hist(top_snow_temp_df, bins=100)
plt.show()

In [None]:
middle_snow_temp_df = data.get('(middle) Snow Temp. (deg. F)')
middle_snow_temp_list = middle_snow_depth_df.tolist()

# Check for null values
is_null_values = middle_snow_depth_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Middle Snow Temp summary stats
df_min = middle_snow_temp_df.min()
df_max = middle_snow_temp_df.max()
df_mean = middle_snow_temp_df.mean()
df_median = middle_snow_temp_df.median()
df_stddev = middle_snow_temp_df.std()
print('Middle Snow Temperature Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = middle_snow_temp_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = middle_snow_temp_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(middle_snow_temp_df.value_counts()))

# Middle Snow Temp plots
plt.hist(middle_snow_temp_df, bins=100)
plt.show()

In [None]:
bottom_snow_temp_df = data.get('(bottom) Snow Temp. (deg. F)')
bottom_snow_temp_list = bottom_snow_temp_df.tolist()

# Check for null values
is_null_values = bottom_snow_depth_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Bottom Snow Temp summary stats
df_min = bottom_snow_temp_df.min()
df_max = bottom_snow_temp_df.max()
df_mean = bottom_snow_temp_df.mean()
df_median = bottom_snow_temp_df.median()
df_stddev = bottom_snow_temp_df.std()
print('Bottom Snow Temperature Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = bottom_snow_temp_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = bottom_snow_temp_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(bottom_snow_temp_df.value_counts()))

# Bottom Snow Temp plots
plt.hist(bottom_snow_temp_df, bins=100)
plt.show()

## Snow Density

In [None]:
top_snow_density_df = data.get('(top) Snow Density (%)')
top_snow_density_list = top_snow_density_df.tolist()

# Check for null values
is_null_values = top_snow_density_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Top Snow Density summary stats
df_min = top_snow_density_df.min()
df_max = top_snow_density_df.max()
df_mean = top_snow_density_df.mean()
df_median = top_snow_density_df.median()
df_stddev = top_snow_density_df.std()
print('Top Snow Density Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = top_snow_density_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = top_snow_density_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(top_snow_density_df.value_counts()))

# Top Snow Density plots
plt.hist(top_snow_density_df, bins=100)
plt.show()

# Top Snow Density plots
plt.hist(top_snow_density_df, bins=100, range=(9.5, 10.5))
plt.show()

In [None]:
middle_snow_density_df = data.get('(middle) Snow Density (%)')
middle_snow_density_list = middle_snow_density_df.tolist()

# Check for null values
is_null_values = middle_snow_density_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Middle Snow Density summary stats
df_min = middle_snow_density_df.min()
df_max = middle_snow_density_df.max()
df_mean = middle_snow_density_df.mean()
df_median = middle_snow_density_df.median()
df_stddev = middle_snow_density_df.std()
print('Middle Snow Density Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = middle_snow_density_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = middle_snow_density_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(middle_snow_density_df.value_counts()))

# Middle Snow Density plots
plt.hist(middle_snow_density_df, bins=100)
plt.show()

In [None]:
bottom_snow_density_df = data.get('(bottom) Snow Density (%)')
bottom_snow_density_list = bottom_snow_density_df.tolist()

# Check for null values
is_null_values = bottom_snow_density_df.isnull().values.any()
print('Null Values: {}\n'.format(is_null_values))

# Bottom Snow Density summary stats
df_min = bottom_snow_density_df.min()
df_max = bottom_snow_density_df.max()
df_mean = bottom_snow_density_df.mean()
df_median = bottom_snow_density_df.median()
df_stddev = bottom_snow_density_df.std()
print('Bottom Snow Density Summary Stats- \nmin:{}\nmax:{}\nmean:{}\nmedian:{}\nstd dev:{}\n'.format(df_min, df_max, df_mean, df_median, df_stddev))

df_mode = bottom_snow_density_df.mode()
df_mode = df_mode.get(0, 1)
num_mode = bottom_snow_density_df.value_counts()[df_mode]
print('Mode Value: {}'.format(df_mode))
print('Mode Occurences: {}\n'.format(num_mode))

print('Value counts:\n{}'.format(bottom_snow_density_df.value_counts()))

# Bottom Snow Density plots
plt.hist(bottom_snow_density_df, bins=100)
plt.show()

## Forecasted from 0 UTC

## Few Cloud Layer (FEW)
Describes cloud cover when between 0/8th and 2/8th of the sky is obscured by cloud.

## Scattered Cloud Layer (SCT)
Scattered cloud layer 3/8ths to 4/8ths 
http://www.moratech.com/aviation/metaf-abbrev.html

## Broken Cloud Layer (BKN)
Broken cloud layer 5/8ths to 7/8ths

## Overcast Cloud Layer (OVC)
Overcast cloud layer 8/8ths coverage

## Vertical Visibility (VV)
Vertical Visibility, indefinite ceiling

## Cloud Rating
TODO: Determine meaning