# Notebook used to convert file format from CIAC db dump to a format we can use to generate ML labels

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('./caic_bc_fx_July142019.csv', 
                 parse_dates=['date', 'date_modified', 'date_issued'],
                 dtype = {'rating': 'object', 
                          'aspect_elev_0': 'object', 'aspect_elev_1': 'object', 'aspect_elev_2': 'object', 
                          'size_0': 'object', 'size_1': 'object', 'size_2': 'object',
                          'problem_0': 'object', 'problem_1': 'object', 'problem_2': 'object'})

In [4]:
#1. filter out draft
df = df[df['status']=='published']


In [26]:
df.columns

Index(['bc_avo_fx_id', 'status', 'zone_id', 'date', 'rating', 'summary',
       'problems', 'problem_0', 'aspect_elev_0', 'likelihood_0', 'size_0',
       'problem_1', 'aspect_elev_1', 'likelihood_1', 'size_1', 'problem_2',
       'aspect_elev_2', 'likelihood_2', 'size_2', 'date_modified',
       'date_issued', 'Day1DangerAboveTreeline', 'Day1DangerNearTreeline',
       'Day1DangerBelowTreeline', 'Day2DangerAboveTreeline',
       'Day2DangerNearTreeline', 'Day2DangerBelowTreeline'],
      dtype='object')

In [6]:
df.tail()

Unnamed: 0,bc_avo_fx_id,status,zone_id,date,rating,summary,problems,problem_0,aspect_elev_0,likelihood_0,...,problem_1,aspect_elev_1,likelihood_1,size_1,problem_2,aspect_elev_2,likelihood_2,size_2,date_modified,date_issued
2261,12053,published,5,2019-07-11 09:44:03,0,We issued our last Statewide Avalanche forecas...,0,2,011011011011011011011011,1,...,1,100100100100100100100100,1,11,4,011011011000000000000011,0,110,2019-07-11 09:44:27,2019-07-11 09:44:27
2262,12054,published,6,2019-07-11 09:44:57,0,We issued our last Statewide Avalanche forecas...,0,2,010010010010010010010010,1,...,1,100100100100100100100100,1,1,4,010010010000000000000000,1,10,2019-07-11 09:45:19,2019-07-11 09:45:19
2263,12055,published,7,2019-07-11 09:45:48,0,We issued our last Statewide Avalanche forecas...,0,2,011011011011011011011011,1,...,1,100100100100100100100100,1,11,1,100100100100100100100100,2,11,2019-07-11 09:46:11,2019-07-11 09:46:11
2264,12056,published,8,2019-07-11 09:46:39,0,We issued our last Statewide Avalanche forecas...,0,2,011011011011011011011011,1,...,1,100100100100100100100100,1,11,1,100100100100100100100100,2,11,2019-07-11 09:46:57,2019-07-11 09:46:57
2265,12057,published,9,2019-07-11 09:47:22,0,We issued our last Statewide Avalanche forecas...,0,3,011011011011000000000011,1,...,1,100100100100100100100100,1,11,5,101010111010101010101010,2,110,2019-07-11 09:47:45,2019-07-11 09:47:45


In [14]:
df['zone_id'].value_counts()

1     227
7     224
8     216
4     212
5     209
3     209
2     207
0     196
9     192
6     190
10     70
11     53
12     41
Name: zone_id, dtype: int64

In [7]:
def number_to_danger(num):
    if num == '0':
        return 'no-data'
    elif num == '1':
        return 'Low'
    elif num == '2':
        return 'Moderate'
    elif num == '3':
        return 'Considerable'
    elif num == '4':
        return 'High'
    elif num == '5':
        return 'Extreme'
    else:
        return 'Unknown-Danger'
    
    

In [8]:
df['Day1DangerAboveTreeline'] = df['rating'].str[0].apply(number_to_danger)
df['Day1DangerNearTreeline'] = df['rating'].str[1].apply(number_to_danger)
df['Day1DangerBelowTreeline'] = df['rating'].str[2].apply(number_to_danger)
df['Day2DangerAboveTreeline'] = df['rating'].str[3].apply(number_to_danger)
df['Day2DangerNearTreeline'] = df['rating'].str[4].apply(number_to_danger)
df['Day2DangerBelowTreeline'] = df['rating'].str[5].apply(number_to_danger)

In [15]:
import numpy as np
def num_to_problem(num):
    if num == '0':
        return 'LooseDry'
    elif num == '1':
        return 'LooseWet'
    elif num == '2':
        return 'StormSlabs'
    elif num == '3':
        return 'WindSlab'
    elif num == '4':
        return 'PersistentSlab'
    elif num == '5':
        return 'DeepPersistentSlab'
    elif num == '6':
        return 'WetSlabs'
    elif num == '7':
        return 'Cornices'
    elif num == '8':
        return 'Glide'
    else:
        raise Exception('Unknown Problem Exception with num: ' + str(num))

def num_to_likelihood(num):
    if num == 0:
        return '0-unlikely'
    elif num == 1:
        return '1-possible'
    elif num == 2:
        return '2-likely'
    elif num == 3:
        return '3-very likely'
    elif num == 4:
        return '4-certain'
    else:
        raise Exception('Unknown Likelihood Exception with num: ' + str(num))

def str_to_maximum_size(s):
    if(pd.isna(s)):
        return 'no-data'
    
    if s[0] == '1':
        return '3-historic'
    elif s[1] == '1':
        return '2-very large'
    elif s[2] == '1':
        return '1-large'
    elif s[3] == '1':
        return '0-small'
    else:
        raise Exception('Unknown MaximumSize Exception with size: ' + s)

def str_to_minimum_size(s):
    if(pd.isna(s)):
        return 'no-data'
    
    if s[3] == '1':
        return '0-small'
    elif s[2] == '1':
        return '1-large'
    elif s[1] == '1':
        return '2-very large'
    elif s[0] == '1':
        return '3-historic'
    else:
        raise Exception('Unknown MinimumSize Exception with size: ' + s)

def num_to_region(num):
    if num == 0:
        return 'Steamboat & Flat Tops'
    elif num == 1:
        return 'Front Range'
    elif num == 2:
        return 'Vail & Summit County'
    elif num == 3:
        return 'Sawatch Range'
    elif num == 4:
        return 'Aspen'
    elif num == 5:
        return 'Grand Mesa'
    elif num == 6:
        return 'Gunnison'
    elif num == 7:
        return 'Northern San Juan'
    elif num == 8:
        return 'Southern San Juan'
    elif num == 9:
        return 'Sangre De Cristo'
    elif num == 10: #these three aren't currently used by OAP
        return 'Northern'
    elif num == 11:
        return 'Central'
    elif num == 12:
        return 'Southern'
    else:
        raise Exception('Unknown region_id: ' + str(num))
        
def row_to_avy_problem(row):
    num_problems = row['problems']
    if pd.isna(num_problems):
        return
    
    row['Region'] = num_to_region(row['zone_id'])
    
    for i in range(0, int(num_problems)):
        try:
            problem_type = num_to_problem(row['problem_' + str(i)])
       
            #print('processing problem type: ' + problem_type)
            row[problem_type + '_Likelihood'] = num_to_likelihood(row['likelihood_' + str(i)])
            row[problem_type + '_MaximumSize'] = str_to_maximum_size(row['size_' + str(i)])
            row[problem_type + '_MinimumSize'] = str_to_minimum_size(row['size_' + str(i)])

            if pd.isna(row['aspect_elev_' + str(i)]):
                row[problem_type + '_OctagonBelowTreelineNorth'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineNorth'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineNorth'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineNorthEast'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineNorthEast'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineNorthEast'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineEast'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineEast'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineEast'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineSouthEast'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineSouthEast'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineSouthEast'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineSouth'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineSouth'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineSouth'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineSouthWest'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineSouthWest'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineSouthWest'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineWest'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineWest'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineWest'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineNorthWest'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineNorthWest'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineNorthWest'] = 'no-data'
            else:  
                row[problem_type + '_OctagonBelowTreelineNorth'] = row['aspect_elev_' + str(i)][0]
                row[problem_type + '_OctagonNearTreelineNorth'] = row['aspect_elev_' + str(i)][1]
                row[problem_type + '_OctagonAboveTreelineNorth'] = row['aspect_elev_' + str(i)][2]
                row[problem_type + '_OctagonBelowTreelineNorthEast'] = row['aspect_elev_' + str(i)][3]
                row[problem_type + '_OctagonNearTreelineNorthEast'] = row['aspect_elev_' + str(i)][4]
                row[problem_type + '_OctagonAboveTreelineNorthEast'] = row['aspect_elev_' + str(i)][5]
                row[problem_type + '_OctagonBelowTreelineEast'] = row['aspect_elev_' + str(i)][6]
                row[problem_type + '_OctagonNearTreelineEast'] = row['aspect_elev_' + str(i)][7]
                row[problem_type + '_OctagonAboveTreelineEast'] = row['aspect_elev_' + str(i)][8]
                row[problem_type + '_OctagonBelowTreelineSouthEast'] = row['aspect_elev_' + str(i)][9]
                row[problem_type + '_OctagonNearTreelineSouthEast'] = row['aspect_elev_' + str(i)][10]
                row[problem_type + '_OctagonAboveTreelineSouthEast'] = row['aspect_elev_' + str(i)][11]
                row[problem_type + '_OctagonBelowTreelineSouth'] = row['aspect_elev_' + str(i)][12]
                row[problem_type + '_OctagonNearTreelineSouth'] = row['aspect_elev_' + str(i)][13]
                row[problem_type + '_OctagonAboveTreelineSouth'] = row['aspect_elev_' + str(i)][14]
                row[problem_type + '_OctagonBelowTreelineSouthWest'] = row['aspect_elev_' + str(i)][15]
                row[problem_type + '_OctagonNearTreelineSouthWest'] = row['aspect_elev_' + str(i)][16]
                row[problem_type + '_OctagonAboveTreelineSouthWest'] = row['aspect_elev_' + str(i)][17]
                row[problem_type + '_OctagonBelowTreelineWest'] = row['aspect_elev_' + str(i)][18]
                row[problem_type + '_OctagonNearTreelineWest'] = row['aspect_elev_' + str(i)][19]
                row[problem_type + '_OctagonAboveTreelineWest'] = row['aspect_elev_' + str(i)][20]
                row[problem_type + '_OctagonBelowTreelineNorthWest'] = row['aspect_elev_' + str(i)][21]
                row[problem_type + '_OctagonNearTreelineNorthWest'] = row['aspect_elev_' + str(i)][22]
                row[problem_type + '_OctagonAboveTreelineNorthWest'] = row['aspect_elev_' + str(i)][23]
        except:
            print("format exception on bc_avo_fx_id: " + str(row['bc_avo_fx_id']) + ' skipping')
    return row
        
        
    
        

In [16]:
tmp = df.apply(row_to_avy_problem, axis=1)

In [80]:
#tmp.to_csv("tmp.csv")

In [17]:
tmp.rename(columns={'date_issued': 'PublishedDateTime', 'summary': 'Day1DetailedForecast'}, inplace=True)



In [18]:
tmp['ForecastUrl'] = ''
tmp['BottomLineSummary'] = ''
tmp['Day1Warning'] = ''
tmp['Day1WarningEnd'] = ''
tmp['Day1WarningText'] = ''
tmp['Day2DetailedForecast'] = ''
tmp['Day2Warning'] = ''
tmp['Day2WarningEnd'] = ''
tmp['Day2WarningText'] = ''
tmp['SpecialStatement'] = ''
tmp['UnifiedRegion'] = tmp['Region']

In [19]:

tmp['Day1Date'] = tmp['PublishedDateTime'].dt.strftime('%Y%m%d')

In [20]:
#tmp.to_csv("tmp.csv")

In [21]:
#filter to the latest forecast for the date (to remove)
#currently just taking the last forecast for the day but this could be improved to differentiate between updated and new forecasts
#also end of season forecast will be overwritten by placeholder forecast
tmp['maxdate'] = tmp.groupby(['Region', 'Day1Date'])['PublishedDateTime'].transform('max')

In [22]:
tmp['PublishedDateTime'] = pd.to_datetime(tmp['PublishedDateTime'])
tmp['maxdate'] = pd.to_datetime(tmp['maxdate'])


In [23]:
tmp = tmp[tmp['PublishedDateTime'] == tmp['maxdate']]

In [24]:
tmp['Day1Date'].head()

4     20181107
6     20181112
8     20181112
9     20181113
10    20181113
Name: Day1Date, dtype: object

In [27]:
#dump the critical points
#tmp[['Region', 'lat', 'lon']].groupby(['Region', 'lat', 'lon']).max()

In [28]:
#final column list
cols = [
    'Region',
    'UnifiedRegion',
    'PublishedDateTime',
    'Day1Date',
    'SpecialStatement',
    'BottomLineSummary',
    'ForecastUrl',
    'Day1DangerAboveTreeline',
    'Day1DangerNearTreeline',
    'Day1DangerBelowTreeline',
    'Day1DetailedForecast',
    'Day1Warning',
    'Day1WarningEnd',
    'Day1WarningText',
    'Day2DangerAboveTreeline',
    'Day2DangerNearTreeline',
    'Day2DangerBelowTreeline',
    'Day2DetailedForecast',
    'Day2Warning',
    'Day2WarningEnd',
    'Day2WarningText',
    'Cornices_Likelihood',
    'Cornices_MaximumSize',
    'Cornices_MinimumSize',
    'Cornices_OctagonAboveTreelineEast',
    'Cornices_OctagonAboveTreelineNorth',
    'Cornices_OctagonAboveTreelineNorthEast',
    'Cornices_OctagonAboveTreelineNorthWest',
    'Cornices_OctagonAboveTreelineSouth',
    'Cornices_OctagonAboveTreelineSouthEast',
    'Cornices_OctagonAboveTreelineSouthWest',
    'Cornices_OctagonAboveTreelineWest',
    'Cornices_OctagonNearTreelineEast',
    'Cornices_OctagonNearTreelineNorth',
    'Cornices_OctagonNearTreelineNorthEast',
    'Cornices_OctagonNearTreelineNorthWest',
    'Cornices_OctagonNearTreelineSouth',
    'Cornices_OctagonNearTreelineSouthEast',
    'Cornices_OctagonNearTreelineSouthWest',
    'Cornices_OctagonNearTreelineWest',
    'Cornices_OctagonBelowTreelineEast',
    'Cornices_OctagonBelowTreelineNorth',
    'Cornices_OctagonBelowTreelineNorthEast',
    'Cornices_OctagonBelowTreelineNorthWest',
    'Cornices_OctagonBelowTreelineSouth',
    'Cornices_OctagonBelowTreelineSouthEast',
    'Cornices_OctagonBelowTreelineSouthWest',
    'Cornices_OctagonBelowTreelineWest',
    'Glide_Likelihood',
    'Glide_MaximumSize',
    'Glide_MinimumSize',
    'Glide_OctagonAboveTreelineEast',
    'Glide_OctagonAboveTreelineNorth',
    'Glide_OctagonAboveTreelineNorthEast',
    'Glide_OctagonAboveTreelineNorthWest',
    'Glide_OctagonAboveTreelineSouth',
    'Glide_OctagonAboveTreelineSouthEast',
    'Glide_OctagonAboveTreelineSouthWest',
    'Glide_OctagonAboveTreelineWest',
    'Glide_OctagonNearTreelineEast',
    'Glide_OctagonNearTreelineNorth',
    'Glide_OctagonNearTreelineNorthEast',
    'Glide_OctagonNearTreelineNorthWest',
    'Glide_OctagonNearTreelineSouth',
    'Glide_OctagonNearTreelineSouthEast',
    'Glide_OctagonNearTreelineSouthWest',
    'Glide_OctagonNearTreelineWest',
    'Glide_OctagonBelowTreelineEast',
    'Glide_OctagonBelowTreelineNorth',
    'Glide_OctagonBelowTreelineNorthEast',
    'Glide_OctagonBelowTreelineNorthWest',
    'Glide_OctagonBelowTreelineSouth',
    'Glide_OctagonBelowTreelineSouthEast',
    'Glide_OctagonBelowTreelineSouthWest',
    'Glide_OctagonBelowTreelineWest',
    'LooseDry_Likelihood',
    'LooseDry_MaximumSize',
    'LooseDry_MinimumSize',
    'LooseDry_OctagonAboveTreelineEast',
    'LooseDry_OctagonAboveTreelineNorth',
    'LooseDry_OctagonAboveTreelineNorthEast',
    'LooseDry_OctagonAboveTreelineNorthWest',
    'LooseDry_OctagonAboveTreelineSouth',
    'LooseDry_OctagonAboveTreelineSouthEast',
    'LooseDry_OctagonAboveTreelineSouthWest',
    'LooseDry_OctagonAboveTreelineWest',
    'LooseDry_OctagonNearTreelineEast',
    'LooseDry_OctagonNearTreelineNorth',
    'LooseDry_OctagonNearTreelineNorthEast',
    'LooseDry_OctagonNearTreelineNorthWest',
    'LooseDry_OctagonNearTreelineSouth',
    'LooseDry_OctagonNearTreelineSouthEast',
    'LooseDry_OctagonNearTreelineSouthWest',
    'LooseDry_OctagonNearTreelineWest',
    'LooseDry_OctagonBelowTreelineEast',
    'LooseDry_OctagonBelowTreelineNorth',
    'LooseDry_OctagonBelowTreelineNorthEast',
    'LooseDry_OctagonBelowTreelineNorthWest',
    'LooseDry_OctagonBelowTreelineSouth',
    'LooseDry_OctagonBelowTreelineSouthEast',
    'LooseDry_OctagonBelowTreelineSouthWest',
    'LooseDry_OctagonBelowTreelineWest',
    'LooseWet_Likelihood',
    'LooseWet_MaximumSize',
    'LooseWet_MinimumSize',
    'LooseWet_OctagonAboveTreelineEast',
    'LooseWet_OctagonAboveTreelineNorth',
    'LooseWet_OctagonAboveTreelineNorthEast',
    'LooseWet_OctagonAboveTreelineNorthWest',
    'LooseWet_OctagonAboveTreelineSouth',
    'LooseWet_OctagonAboveTreelineSouthEast',
    'LooseWet_OctagonAboveTreelineSouthWest',
    'LooseWet_OctagonAboveTreelineWest',
    'LooseWet_OctagonNearTreelineEast',
    'LooseWet_OctagonNearTreelineNorth',
    'LooseWet_OctagonNearTreelineNorthEast',
    'LooseWet_OctagonNearTreelineNorthWest',
    'LooseWet_OctagonNearTreelineSouth',
    'LooseWet_OctagonNearTreelineSouthEast',
    'LooseWet_OctagonNearTreelineSouthWest',
    'LooseWet_OctagonNearTreelineWest',
    'LooseWet_OctagonBelowTreelineEast',
    'LooseWet_OctagonBelowTreelineNorth',
    'LooseWet_OctagonBelowTreelineNorthEast',
    'LooseWet_OctagonBelowTreelineNorthWest',
    'LooseWet_OctagonBelowTreelineSouth',
    'LooseWet_OctagonBelowTreelineSouthEast',
    'LooseWet_OctagonBelowTreelineSouthWest',
    'LooseWet_OctagonBelowTreelineWest',
    'PersistentSlab_Likelihood',
    'PersistentSlab_MaximumSize',
    'PersistentSlab_MinimumSize',
    'PersistentSlab_OctagonAboveTreelineEast',
    'PersistentSlab_OctagonAboveTreelineNorth',
    'PersistentSlab_OctagonAboveTreelineNorthEast',
    'PersistentSlab_OctagonAboveTreelineNorthWest',
    'PersistentSlab_OctagonAboveTreelineSouth',
    'PersistentSlab_OctagonAboveTreelineSouthEast',
    'PersistentSlab_OctagonAboveTreelineSouthWest',
    'PersistentSlab_OctagonAboveTreelineWest',
    'PersistentSlab_OctagonNearTreelineEast',
    'PersistentSlab_OctagonNearTreelineNorth',
    'PersistentSlab_OctagonNearTreelineNorthEast',
    'PersistentSlab_OctagonNearTreelineNorthWest',
    'PersistentSlab_OctagonNearTreelineSouth',
    'PersistentSlab_OctagonNearTreelineSouthEast',
    'PersistentSlab_OctagonNearTreelineSouthWest',
    'PersistentSlab_OctagonNearTreelineWest',
    'PersistentSlab_OctagonBelowTreelineEast',
    'PersistentSlab_OctagonBelowTreelineNorth',
    'PersistentSlab_OctagonBelowTreelineNorthEast',
    'PersistentSlab_OctagonBelowTreelineNorthWest',
    'PersistentSlab_OctagonBelowTreelineSouth',
    'PersistentSlab_OctagonBelowTreelineSouthEast',
    'PersistentSlab_OctagonBelowTreelineSouthWest',
    'PersistentSlab_OctagonBelowTreelineWest',
    'DeepPersistentSlab_Likelihood',
    'DeepPersistentSlab_MaximumSize',
    'DeepPersistentSlab_MinimumSize',
    'DeepPersistentSlab_OctagonAboveTreelineEast',
    'DeepPersistentSlab_OctagonAboveTreelineNorth',
    'DeepPersistentSlab_OctagonAboveTreelineNorthEast',
    'DeepPersistentSlab_OctagonAboveTreelineNorthWest',
    'DeepPersistentSlab_OctagonAboveTreelineSouth',
    'DeepPersistentSlab_OctagonAboveTreelineSouthEast',
    'DeepPersistentSlab_OctagonAboveTreelineSouthWest',
    'DeepPersistentSlab_OctagonAboveTreelineWest',
    'DeepPersistentSlab_OctagonNearTreelineEast',
    'DeepPersistentSlab_OctagonNearTreelineNorth',
    'DeepPersistentSlab_OctagonNearTreelineNorthEast',
    'DeepPersistentSlab_OctagonNearTreelineNorthWest',
    'DeepPersistentSlab_OctagonNearTreelineSouth',
    'DeepPersistentSlab_OctagonNearTreelineSouthEast',
    'DeepPersistentSlab_OctagonNearTreelineSouthWest',
    'DeepPersistentSlab_OctagonNearTreelineWest',
    'DeepPersistentSlab_OctagonBelowTreelineEast',
    'DeepPersistentSlab_OctagonBelowTreelineNorth',
    'DeepPersistentSlab_OctagonBelowTreelineNorthEast',
    'DeepPersistentSlab_OctagonBelowTreelineNorthWest',
    'DeepPersistentSlab_OctagonBelowTreelineSouth',
    'DeepPersistentSlab_OctagonBelowTreelineSouthEast',
    'DeepPersistentSlab_OctagonBelowTreelineSouthWest',
    'DeepPersistentSlab_OctagonBelowTreelineWest',
    'StormSlabs_Likelihood',
    'StormSlabs_MaximumSize',
    'StormSlabs_MinimumSize',
    'StormSlabs_OctagonAboveTreelineEast',
    'StormSlabs_OctagonAboveTreelineNorth',
    'StormSlabs_OctagonAboveTreelineNorthEast',
    'StormSlabs_OctagonAboveTreelineNorthWest',
    'StormSlabs_OctagonAboveTreelineSouth',
    'StormSlabs_OctagonAboveTreelineSouthEast',
    'StormSlabs_OctagonAboveTreelineSouthWest',
    'StormSlabs_OctagonAboveTreelineWest',
    'StormSlabs_OctagonNearTreelineEast',
    'StormSlabs_OctagonNearTreelineNorth',
    'StormSlabs_OctagonNearTreelineNorthEast',
    'StormSlabs_OctagonNearTreelineNorthWest',
    'StormSlabs_OctagonNearTreelineSouth',
    'StormSlabs_OctagonNearTreelineSouthEast',
    'StormSlabs_OctagonNearTreelineSouthWest',
    'StormSlabs_OctagonNearTreelineWest',
    'StormSlabs_OctagonBelowTreelineEast',
    'StormSlabs_OctagonBelowTreelineNorth',
    'StormSlabs_OctagonBelowTreelineNorthEast',
    'StormSlabs_OctagonBelowTreelineNorthWest',
    'StormSlabs_OctagonBelowTreelineSouth',
    'StormSlabs_OctagonBelowTreelineSouthEast',
    'StormSlabs_OctagonBelowTreelineSouthWest',
    'StormSlabs_OctagonBelowTreelineWest',
    'WetSlabs_Likelihood',
    'WetSlabs_MaximumSize',
    'WetSlabs_MinimumSize',
    'WetSlabs_OctagonAboveTreelineEast',
    'WetSlabs_OctagonAboveTreelineNorth',
    'WetSlabs_OctagonAboveTreelineNorthEast',
    'WetSlabs_OctagonAboveTreelineNorthWest',
    'WetSlabs_OctagonAboveTreelineSouth',
    'WetSlabs_OctagonAboveTreelineSouthEast',
    'WetSlabs_OctagonAboveTreelineSouthWest',
    'WetSlabs_OctagonAboveTreelineWest',
    'WetSlabs_OctagonNearTreelineEast',
    'WetSlabs_OctagonNearTreelineNorth',
    'WetSlabs_OctagonNearTreelineNorthEast',
    'WetSlabs_OctagonNearTreelineNorthWest',
    'WetSlabs_OctagonNearTreelineSouth',
    'WetSlabs_OctagonNearTreelineSouthEast',
    'WetSlabs_OctagonNearTreelineSouthWest',
    'WetSlabs_OctagonNearTreelineWest',
    'WetSlabs_OctagonBelowTreelineEast',
    'WetSlabs_OctagonBelowTreelineNorth',
    'WetSlabs_OctagonBelowTreelineNorthEast',
    'WetSlabs_OctagonBelowTreelineNorthWest',
    'WetSlabs_OctagonBelowTreelineSouth',
    'WetSlabs_OctagonBelowTreelineSouthEast',
    'WetSlabs_OctagonBelowTreelineSouthWest',
    'WetSlabs_OctagonBelowTreelineWest',
    'WindSlab_Likelihood',
    'WindSlab_MaximumSize',
    'WindSlab_MinimumSize',
    'WindSlab_OctagonAboveTreelineEast',
    'WindSlab_OctagonAboveTreelineNorth',
    'WindSlab_OctagonAboveTreelineNorthEast',
    'WindSlab_OctagonAboveTreelineNorthWest',
    'WindSlab_OctagonAboveTreelineSouth',
    'WindSlab_OctagonAboveTreelineSouthEast',
    'WindSlab_OctagonAboveTreelineSouthWest',
    'WindSlab_OctagonAboveTreelineWest',
    'WindSlab_OctagonNearTreelineEast',
    'WindSlab_OctagonNearTreelineNorth',
    'WindSlab_OctagonNearTreelineNorthEast',
    'WindSlab_OctagonNearTreelineNorthWest',
    'WindSlab_OctagonNearTreelineSouth',
    'WindSlab_OctagonNearTreelineSouthEast',
    'WindSlab_OctagonNearTreelineSouthWest',
    'WindSlab_OctagonNearTreelineWest',
    'WindSlab_OctagonBelowTreelineEast',
    'WindSlab_OctagonBelowTreelineNorth',
    'WindSlab_OctagonBelowTreelineNorthEast',
    'WindSlab_OctagonBelowTreelineNorthWest',
    'WindSlab_OctagonBelowTreelineSouth',
    'WindSlab_OctagonBelowTreelineSouthEast',
    'WindSlab_OctagonBelowTreelineSouthWest',
    'WindSlab_OctagonBelowTreelineWest'
]


In [29]:
len(tmp)

1789

In [30]:
finalDf = tmp[cols]

In [31]:
finalDf.replace(np.nan, 'no-data', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  method=method,


In [32]:
finalDf[:20]

Unnamed: 0,Region,UnifiedRegion,PublishedDateTime,Day1Date,SpecialStatement,BottomLineSummary,ForecastUrl,Day1DangerAboveTreeline,Day1DangerNearTreeline,Day1DangerBelowTreeline,...,WindSlab_OctagonNearTreelineSouthWest,WindSlab_OctagonNearTreelineWest,WindSlab_OctagonBelowTreelineEast,WindSlab_OctagonBelowTreelineNorth,WindSlab_OctagonBelowTreelineNorthEast,WindSlab_OctagonBelowTreelineNorthWest,WindSlab_OctagonBelowTreelineSouth,WindSlab_OctagonBelowTreelineSouthEast,WindSlab_OctagonBelowTreelineSouthWest,WindSlab_OctagonBelowTreelineWest
4,Sangre De Cristo,Sangre De Cristo,2018-11-07 11:44:47,20181107,,,,no-data,no-data,no-data,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
6,Gunnison,Gunnison,2018-11-12 05:41:24,20181112,,,,no-data,no-data,no-data,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
8,Sangre De Cristo,Sangre De Cristo,2018-11-12 15:05:24,20181112,,,,no-data,no-data,no-data,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
9,Steamboat & Flat Tops,Steamboat & Flat Tops,2018-11-13 10:26:38,20181113,,,,no-data,no-data,no-data,...,0,1,0,0,1,1,0,1,1,0
10,Sangre De Cristo,Sangre De Cristo,2018-11-13 10:27:39,20181113,,,,no-data,no-data,no-data,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
12,Front Range,Front Range,2018-11-15 07:03:13,20181115,,,,Moderate,Moderate,Low,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
13,Sangre De Cristo,Sangre De Cristo,2018-11-15 07:11:40,20181115,,,,Moderate,Moderate,Low,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
14,Aspen,Aspen,2018-11-15 07:36:06,20181115,,,,Moderate,Moderate,Low,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
16,Vail & Summit County,Vail & Summit County,2018-11-15 06:46:47,20181115,,,,Moderate,Moderate,Low,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
18,Sawatch Range,Sawatch Range,2018-11-15 06:46:56,20181115,,,,Moderate,Moderate,Low,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data


In [33]:
finalDf.to_csv('CleanedForecastsCAIC2019.V1.csv', index=False, date_format='%Y%m%d %H:00')