# Notebook used to convert file format from CIAC db dump to a format we can use to generate ML labels

In [1]:
import pandas as pd

In [20]:
df = pd.read_csv('./19-21CAIC.csv', 
                 parse_dates=['date', 'date_modified', 'date_issued'],
                 dtype = {'rating': 'object', 
                          'aspect_elev_0': 'object', 'aspect_elev_1': 'object', 'aspect_elev_2': 'object', 
                          'size_0': 'object', 'size_1': 'object', 'size_2': 'object',
                          'problem_0': 'object', 'problem_1': 'object', 'problem_2': 'object'})

In [21]:
#1. filter out draft
df = df[df['status']=='published']


In [22]:
df.columns

Index(['bc_avo_fx_id', 'status', 'zone_id', 'date', 'rating', 'summary',
       'problems', 'problem_0', 'aspect_elev_0', 'likelihood_0', 'size_0',
       'problem_1', 'aspect_elev_1', 'likelihood_1', 'size_1', 'problem_2',
       'aspect_elev_2', 'likelihood_2', 'size_2', 'date_modified',
       'date_issued'],
      dtype='object')

In [23]:
df.head(25)

Unnamed: 0,bc_avo_fx_id,status,zone_id,date,rating,summary,problems,problem_0,aspect_elev_0,likelihood_0,...,problem_1,aspect_elev_1,likelihood_1,size_1,problem_2,aspect_elev_2,likelihood_2,size_2,date_modified,date_issued
0,12087,published,12,2019-11-01 06:35:54,100100,Replaced Description,1,8,000000000000100000000000,0,...,0,000000000000000000000000,0,0,0,000000000000000000000000,0,0,2019-11-01 13:35:32,2019-11-01 13:35:32
1,12088,published,10,2019-11-01 09:00:15,200200,Replaced Description,1,8,000000000000100000000000,0,...,0,000000000000000000000000,0,0,0,000000000000000000000000,0,0,2019-11-01 13:26:52,2019-11-01 13:26:52
2,12089,published,11,2019-11-01 09:02:22,200200,Replaced Description,1,8,000000000000100000000000,0,...,0,000000000000000000000000,0,0,0,000000000000000000000000,0,0,2019-11-01 13:35:06,2019-11-01 13:35:06
3,12090,published,11,2019-11-02 12:35:53,100100,Replaced Description,1,8,000000000000100000000000,0,...,0,000000000000000000000000,0,0,0,000000000000000000000000,0,0,2019-11-02 13:34:25,2019-11-02 13:34:25
4,12091,published,10,2019-11-02 12:35:58,100100,Replaced Description,1,8,000000000000100000000000,0,...,0,000000000000000000000000,0,0,0,000000000000000000000000,0,0,2019-11-02 13:19:04,2019-11-02 13:19:04
5,12092,published,10,2019-11-02 13:20:00,100100,Replaced Description,1,8,000000000000100000000000,0,...,0,000000000000000000000000,0,0,0,000000000000000000000000,0,0,2019-11-02 13:22:26,2019-11-02 13:22:26
6,12093,published,12,2019-11-02 13:23:29,100100,Replaced Description,1,8,000000000000100000000000,0,...,0,000000000000000000000000,0,0,0,000000000000000000000000,0,0,2019-11-02 13:27:29,2019-11-02 13:27:29
7,12095,published,10,2019-11-03 12:47:59,100100,Replaced Description,1,8,000000000000100000000000,0,...,0,000000000000000000000000,0,0,0,000000000000000000000000,0,0,2019-11-03 13:09:46,2019-11-03 13:09:46
8,12096,published,11,2019-11-03 12:50:24,100100,Replaced Description,1,8,000000000000100000000000,0,...,0,000000000000000000000000,0,0,0,000000000000000000000000,0,0,2019-11-03 13:08:59,2019-11-03 13:08:59
9,12097,published,12,2019-11-03 12:54:47,100100,Replaced Description,1,8,000000000000100000000000,0,...,0,000000000000000000000000,0,0,0,000000000000000000000000,0,0,2019-11-03 13:08:00,2019-11-03 13:08:00


In [24]:
df['zone_id'].value_counts()

1     409
7     396
4     394
3     391
8     382
5     375
0     373
2     359
6     339
9     335
12    187
10    174
11    172
Name: zone_id, dtype: int64

In [25]:
def number_to_danger(num):
    if num == '0':
        return 'no-data'
    elif num == '1':
        return 'Low'
    elif num == '2':
        return 'Moderate'
    elif num == '3':
        return 'Considerable'
    elif num == '4':
        return 'High'
    elif num == '5':
        return 'Extreme'
    else:
        return 'Unknown-Danger'
    
    

In [26]:
df['Day1DangerAboveTreeline'] = df['rating'].str[0].apply(number_to_danger)
df['Day1DangerNearTreeline'] = df['rating'].str[1].apply(number_to_danger)
df['Day1DangerBelowTreeline'] = df['rating'].str[2].apply(number_to_danger)
df['Day2DangerAboveTreeline'] = df['rating'].str[3].apply(number_to_danger)
df['Day2DangerNearTreeline'] = df['rating'].str[4].apply(number_to_danger)
df['Day2DangerBelowTreeline'] = df['rating'].str[5].apply(number_to_danger)

In [27]:
import numpy as np
def num_to_problem(num):
    if num == '0':
        return 'LooseDry'
    elif num == '1':
        return 'LooseWet'
    elif num == '2':
        return 'StormSlabs'
    elif num == '3':
        return 'WindSlab'
    elif num == '4':
        return 'PersistentSlab'
    elif num == '5':
        return 'DeepPersistentSlab'
    elif num == '6':
        return 'WetSlabs'
    elif num == '7':
        return 'Cornices'
    elif num == '8':
        return 'Glide'
    else:
        raise Exception('Unknown Problem Exception with num: ' + str(num))

def num_to_likelihood(num):
    if num == 0:
        return '0-unlikely'
    elif num == 1:
        return '1-possible'
    elif num == 2:
        return '2-likely'
    elif num == 3:
        return '3-very likely'
    elif num == 4:
        return '4-certain'
    else:
        raise Exception('Unknown Likelihood Exception with num: ' + str(num))

def str_to_maximum_size(s):
    if(pd.isna(s)):
        return 'no-data'
    
    if s[0] == '1':
        return '3-historic'
    elif s[1] == '1':
        return '2-very large'
    elif s[2] == '1':
        return '1-large'
    elif s[3] == '1':
        return '0-small'
    else:
        raise Exception('Unknown MaximumSize Exception with size: ' + s)

def str_to_minimum_size(s):
    if(pd.isna(s)):
        return 'no-data'
    
    if s[3] == '1':
        return '0-small'
    elif s[2] == '1':
        return '1-large'
    elif s[1] == '1':
        return '2-very large'
    elif s[0] == '1':
        return '3-historic'
    else:
        raise Exception('Unknown MinimumSize Exception with size: ' + s)

def num_to_region(num):
    if num == 0:
        return 'Steamboat & Flat Tops'
    elif num == 1:
        return 'Front Range'
    elif num == 2:
        return 'Vail & Summit County'
    elif num == 3:
        return 'Sawatch Range'
    elif num == 4:
        return 'Aspen'
    elif num == 5:
        return 'Grand Mesa'
    elif num == 6:
        return 'Gunnison'
    elif num == 7:
        return 'Northern San Juan'
    elif num == 8:
        return 'Southern San Juan'
    elif num == 9:
        return 'Sangre De Cristo'
    elif num == 10: #these three aren't currently used by OAP
        return 'Northern'
    elif num == 11:
        return 'Central'
    elif num == 12:
        return 'Southern'
    else:
        raise Exception('Unknown region_id: ' + str(num))
        
def row_to_avy_problem(row):
    num_problems = row['problems']
    if pd.isna(num_problems):
        return
    
    row['Region'] = num_to_region(row['zone_id'])
    
    for i in range(0, int(num_problems)):
        try:
            problem_type = num_to_problem(row['problem_' + str(i)])
       
            #print('processing problem type: ' + problem_type)
            row[problem_type + '_Likelihood'] = num_to_likelihood(row['likelihood_' + str(i)])
            row[problem_type + '_MaximumSize'] = str_to_maximum_size(row['size_' + str(i)])
            row[problem_type + '_MinimumSize'] = str_to_minimum_size(row['size_' + str(i)])

            if pd.isna(row['aspect_elev_' + str(i)]):
                row[problem_type + '_OctagonBelowTreelineNorth'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineNorth'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineNorth'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineNorthEast'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineNorthEast'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineNorthEast'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineEast'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineEast'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineEast'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineSouthEast'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineSouthEast'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineSouthEast'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineSouth'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineSouth'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineSouth'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineSouthWest'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineSouthWest'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineSouthWest'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineWest'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineWest'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineWest'] = 'no-data'
                row[problem_type + '_OctagonBelowTreelineNorthWest'] = 'no-data'
                row[problem_type + '_OctagonNearTreelineNorthWest'] = 'no-data'
                row[problem_type + '_OctagonAboveTreelineNorthWest'] = 'no-data'
            else:  
                row[problem_type + '_OctagonBelowTreelineNorth'] = row['aspect_elev_' + str(i)][0]
                row[problem_type + '_OctagonNearTreelineNorth'] = row['aspect_elev_' + str(i)][1]
                row[problem_type + '_OctagonAboveTreelineNorth'] = row['aspect_elev_' + str(i)][2]
                row[problem_type + '_OctagonBelowTreelineNorthEast'] = row['aspect_elev_' + str(i)][3]
                row[problem_type + '_OctagonNearTreelineNorthEast'] = row['aspect_elev_' + str(i)][4]
                row[problem_type + '_OctagonAboveTreelineNorthEast'] = row['aspect_elev_' + str(i)][5]
                row[problem_type + '_OctagonBelowTreelineEast'] = row['aspect_elev_' + str(i)][6]
                row[problem_type + '_OctagonNearTreelineEast'] = row['aspect_elev_' + str(i)][7]
                row[problem_type + '_OctagonAboveTreelineEast'] = row['aspect_elev_' + str(i)][8]
                row[problem_type + '_OctagonBelowTreelineSouthEast'] = row['aspect_elev_' + str(i)][9]
                row[problem_type + '_OctagonNearTreelineSouthEast'] = row['aspect_elev_' + str(i)][10]
                row[problem_type + '_OctagonAboveTreelineSouthEast'] = row['aspect_elev_' + str(i)][11]
                row[problem_type + '_OctagonBelowTreelineSouth'] = row['aspect_elev_' + str(i)][12]
                row[problem_type + '_OctagonNearTreelineSouth'] = row['aspect_elev_' + str(i)][13]
                row[problem_type + '_OctagonAboveTreelineSouth'] = row['aspect_elev_' + str(i)][14]
                row[problem_type + '_OctagonBelowTreelineSouthWest'] = row['aspect_elev_' + str(i)][15]
                row[problem_type + '_OctagonNearTreelineSouthWest'] = row['aspect_elev_' + str(i)][16]
                row[problem_type + '_OctagonAboveTreelineSouthWest'] = row['aspect_elev_' + str(i)][17]
                row[problem_type + '_OctagonBelowTreelineWest'] = row['aspect_elev_' + str(i)][18]
                row[problem_type + '_OctagonNearTreelineWest'] = row['aspect_elev_' + str(i)][19]
                row[problem_type + '_OctagonAboveTreelineWest'] = row['aspect_elev_' + str(i)][20]
                row[problem_type + '_OctagonBelowTreelineNorthWest'] = row['aspect_elev_' + str(i)][21]
                row[problem_type + '_OctagonNearTreelineNorthWest'] = row['aspect_elev_' + str(i)][22]
                row[problem_type + '_OctagonAboveTreelineNorthWest'] = row['aspect_elev_' + str(i)][23]
        except:
            print("format exception on bc_avo_fx_id: " + str(row['bc_avo_fx_id']) + ' skipping')
    return row
        
        
    
        

In [28]:
tmp = df.apply(row_to_avy_problem, axis=1)

In [29]:
tmp.rename(columns={'date_issued': 'PublishedDateTime', 'summary': 'Day1DetailedForecast'}, inplace=True)



In [30]:
tmp['ForecastUrl'] = ''
tmp['BottomLineSummary'] = ''
tmp['Day1Warning'] = ''
tmp['Day1WarningEnd'] = ''
tmp['Day1WarningText'] = ''
tmp['Day2DetailedForecast'] = ''
tmp['Day2Warning'] = ''
tmp['Day2WarningEnd'] = ''
tmp['Day2WarningText'] = ''
tmp['SpecialStatement'] = ''
tmp['UnifiedRegion'] = tmp['Region']

In [31]:
tmp['PublishedDateTime'][:25]

0    2019-11-01 13:35:32
1    2019-11-01 13:26:52
2    2019-11-01 13:35:06
3    2019-11-02 13:34:25
4    2019-11-02 13:19:04
5    2019-11-02 13:22:26
6    2019-11-02 13:27:29
7    2019-11-03 13:09:46
8    2019-11-03 13:08:59
9    2019-11-03 13:08:00
10   2019-11-04 13:47:38
11   2019-11-04 14:06:13
12   2019-11-04 14:15:07
13   2019-11-05 13:05:35
14   2019-11-05 13:23:53
15   2019-11-05 13:32:13
16   2019-11-05 13:32:49
17   2019-11-05 13:33:40
18   2019-11-05 17:56:58
19   2019-11-05 19:35:22
20   2019-11-05 19:35:35
21   2019-11-05 19:35:11
22   2019-11-05 19:35:46
23   2019-11-05 19:36:57
24   2019-11-05 19:37:08
Name: PublishedDateTime, dtype: datetime64[ns]

In [32]:

tmp['Day1Date'] = tmp['PublishedDateTime'].dt.strftime('%Y%m%d')

In [20]:
#tmp.to_csv("tmp.csv")

In [33]:
#filter to the latest forecast for the date (to remove)
#currently just taking the last forecast for the day but this could be improved to differentiate between updated and new forecasts
#also end of season forecast will be overwritten by placeholder forecast
tmp['maxdate'] = tmp.groupby(['Region', 'Day1Date'])['PublishedDateTime'].transform('max')

In [34]:
tmp['PublishedDateTime'] = pd.to_datetime(tmp['PublishedDateTime'])
tmp['maxdate'] = pd.to_datetime(tmp['maxdate'])


In [35]:
tmp = tmp[tmp['PublishedDateTime'] == tmp['maxdate']]

In [36]:
tmp['Day1Date'].head()

0    20191101
1    20191101
2    20191101
3    20191102
5    20191102
Name: Day1Date, dtype: object

In [37]:
#dump the critical points
#tmp[['Region', 'lat', 'lon']].groupby(['Region', 'lat', 'lon']).max()

In [38]:
#final column list
cols = [
    'Region',
    'UnifiedRegion',
    'PublishedDateTime',
    'Day1Date',
    'SpecialStatement',
    'BottomLineSummary',
    'ForecastUrl',
    'Day1DangerAboveTreeline',
    'Day1DangerNearTreeline',
    'Day1DangerBelowTreeline',
    'Day1DetailedForecast',
    'Day1Warning',
    'Day1WarningEnd',
    'Day1WarningText',
    'Day2DangerAboveTreeline',
    'Day2DangerNearTreeline',
    'Day2DangerBelowTreeline',
    'Day2DetailedForecast',
    'Day2Warning',
    'Day2WarningEnd',
    'Day2WarningText',
    'Cornices_Likelihood',
    'Cornices_MaximumSize',
    'Cornices_MinimumSize',
    'Cornices_OctagonAboveTreelineEast',
    'Cornices_OctagonAboveTreelineNorth',
    'Cornices_OctagonAboveTreelineNorthEast',
    'Cornices_OctagonAboveTreelineNorthWest',
    'Cornices_OctagonAboveTreelineSouth',
    'Cornices_OctagonAboveTreelineSouthEast',
    'Cornices_OctagonAboveTreelineSouthWest',
    'Cornices_OctagonAboveTreelineWest',
    'Cornices_OctagonNearTreelineEast',
    'Cornices_OctagonNearTreelineNorth',
    'Cornices_OctagonNearTreelineNorthEast',
    'Cornices_OctagonNearTreelineNorthWest',
    'Cornices_OctagonNearTreelineSouth',
    'Cornices_OctagonNearTreelineSouthEast',
    'Cornices_OctagonNearTreelineSouthWest',
    'Cornices_OctagonNearTreelineWest',
    'Cornices_OctagonBelowTreelineEast',
    'Cornices_OctagonBelowTreelineNorth',
    'Cornices_OctagonBelowTreelineNorthEast',
    'Cornices_OctagonBelowTreelineNorthWest',
    'Cornices_OctagonBelowTreelineSouth',
    'Cornices_OctagonBelowTreelineSouthEast',
    'Cornices_OctagonBelowTreelineSouthWest',
    'Cornices_OctagonBelowTreelineWest',
    'Glide_Likelihood',
    'Glide_MaximumSize',
    'Glide_MinimumSize',
    'Glide_OctagonAboveTreelineEast',
    'Glide_OctagonAboveTreelineNorth',
    'Glide_OctagonAboveTreelineNorthEast',
    'Glide_OctagonAboveTreelineNorthWest',
    'Glide_OctagonAboveTreelineSouth',
    'Glide_OctagonAboveTreelineSouthEast',
    'Glide_OctagonAboveTreelineSouthWest',
    'Glide_OctagonAboveTreelineWest',
    'Glide_OctagonNearTreelineEast',
    'Glide_OctagonNearTreelineNorth',
    'Glide_OctagonNearTreelineNorthEast',
    'Glide_OctagonNearTreelineNorthWest',
    'Glide_OctagonNearTreelineSouth',
    'Glide_OctagonNearTreelineSouthEast',
    'Glide_OctagonNearTreelineSouthWest',
    'Glide_OctagonNearTreelineWest',
    'Glide_OctagonBelowTreelineEast',
    'Glide_OctagonBelowTreelineNorth',
    'Glide_OctagonBelowTreelineNorthEast',
    'Glide_OctagonBelowTreelineNorthWest',
    'Glide_OctagonBelowTreelineSouth',
    'Glide_OctagonBelowTreelineSouthEast',
    'Glide_OctagonBelowTreelineSouthWest',
    'Glide_OctagonBelowTreelineWest',
    'LooseDry_Likelihood',
    'LooseDry_MaximumSize',
    'LooseDry_MinimumSize',
    'LooseDry_OctagonAboveTreelineEast',
    'LooseDry_OctagonAboveTreelineNorth',
    'LooseDry_OctagonAboveTreelineNorthEast',
    'LooseDry_OctagonAboveTreelineNorthWest',
    'LooseDry_OctagonAboveTreelineSouth',
    'LooseDry_OctagonAboveTreelineSouthEast',
    'LooseDry_OctagonAboveTreelineSouthWest',
    'LooseDry_OctagonAboveTreelineWest',
    'LooseDry_OctagonNearTreelineEast',
    'LooseDry_OctagonNearTreelineNorth',
    'LooseDry_OctagonNearTreelineNorthEast',
    'LooseDry_OctagonNearTreelineNorthWest',
    'LooseDry_OctagonNearTreelineSouth',
    'LooseDry_OctagonNearTreelineSouthEast',
    'LooseDry_OctagonNearTreelineSouthWest',
    'LooseDry_OctagonNearTreelineWest',
    'LooseDry_OctagonBelowTreelineEast',
    'LooseDry_OctagonBelowTreelineNorth',
    'LooseDry_OctagonBelowTreelineNorthEast',
    'LooseDry_OctagonBelowTreelineNorthWest',
    'LooseDry_OctagonBelowTreelineSouth',
    'LooseDry_OctagonBelowTreelineSouthEast',
    'LooseDry_OctagonBelowTreelineSouthWest',
    'LooseDry_OctagonBelowTreelineWest',
    'LooseWet_Likelihood',
    'LooseWet_MaximumSize',
    'LooseWet_MinimumSize',
    'LooseWet_OctagonAboveTreelineEast',
    'LooseWet_OctagonAboveTreelineNorth',
    'LooseWet_OctagonAboveTreelineNorthEast',
    'LooseWet_OctagonAboveTreelineNorthWest',
    'LooseWet_OctagonAboveTreelineSouth',
    'LooseWet_OctagonAboveTreelineSouthEast',
    'LooseWet_OctagonAboveTreelineSouthWest',
    'LooseWet_OctagonAboveTreelineWest',
    'LooseWet_OctagonNearTreelineEast',
    'LooseWet_OctagonNearTreelineNorth',
    'LooseWet_OctagonNearTreelineNorthEast',
    'LooseWet_OctagonNearTreelineNorthWest',
    'LooseWet_OctagonNearTreelineSouth',
    'LooseWet_OctagonNearTreelineSouthEast',
    'LooseWet_OctagonNearTreelineSouthWest',
    'LooseWet_OctagonNearTreelineWest',
    'LooseWet_OctagonBelowTreelineEast',
    'LooseWet_OctagonBelowTreelineNorth',
    'LooseWet_OctagonBelowTreelineNorthEast',
    'LooseWet_OctagonBelowTreelineNorthWest',
    'LooseWet_OctagonBelowTreelineSouth',
    'LooseWet_OctagonBelowTreelineSouthEast',
    'LooseWet_OctagonBelowTreelineSouthWest',
    'LooseWet_OctagonBelowTreelineWest',
    'PersistentSlab_Likelihood',
    'PersistentSlab_MaximumSize',
    'PersistentSlab_MinimumSize',
    'PersistentSlab_OctagonAboveTreelineEast',
    'PersistentSlab_OctagonAboveTreelineNorth',
    'PersistentSlab_OctagonAboveTreelineNorthEast',
    'PersistentSlab_OctagonAboveTreelineNorthWest',
    'PersistentSlab_OctagonAboveTreelineSouth',
    'PersistentSlab_OctagonAboveTreelineSouthEast',
    'PersistentSlab_OctagonAboveTreelineSouthWest',
    'PersistentSlab_OctagonAboveTreelineWest',
    'PersistentSlab_OctagonNearTreelineEast',
    'PersistentSlab_OctagonNearTreelineNorth',
    'PersistentSlab_OctagonNearTreelineNorthEast',
    'PersistentSlab_OctagonNearTreelineNorthWest',
    'PersistentSlab_OctagonNearTreelineSouth',
    'PersistentSlab_OctagonNearTreelineSouthEast',
    'PersistentSlab_OctagonNearTreelineSouthWest',
    'PersistentSlab_OctagonNearTreelineWest',
    'PersistentSlab_OctagonBelowTreelineEast',
    'PersistentSlab_OctagonBelowTreelineNorth',
    'PersistentSlab_OctagonBelowTreelineNorthEast',
    'PersistentSlab_OctagonBelowTreelineNorthWest',
    'PersistentSlab_OctagonBelowTreelineSouth',
    'PersistentSlab_OctagonBelowTreelineSouthEast',
    'PersistentSlab_OctagonBelowTreelineSouthWest',
    'PersistentSlab_OctagonBelowTreelineWest',
    'DeepPersistentSlab_Likelihood',
    'DeepPersistentSlab_MaximumSize',
    'DeepPersistentSlab_MinimumSize',
    'DeepPersistentSlab_OctagonAboveTreelineEast',
    'DeepPersistentSlab_OctagonAboveTreelineNorth',
    'DeepPersistentSlab_OctagonAboveTreelineNorthEast',
    'DeepPersistentSlab_OctagonAboveTreelineNorthWest',
    'DeepPersistentSlab_OctagonAboveTreelineSouth',
    'DeepPersistentSlab_OctagonAboveTreelineSouthEast',
    'DeepPersistentSlab_OctagonAboveTreelineSouthWest',
    'DeepPersistentSlab_OctagonAboveTreelineWest',
    'DeepPersistentSlab_OctagonNearTreelineEast',
    'DeepPersistentSlab_OctagonNearTreelineNorth',
    'DeepPersistentSlab_OctagonNearTreelineNorthEast',
    'DeepPersistentSlab_OctagonNearTreelineNorthWest',
    'DeepPersistentSlab_OctagonNearTreelineSouth',
    'DeepPersistentSlab_OctagonNearTreelineSouthEast',
    'DeepPersistentSlab_OctagonNearTreelineSouthWest',
    'DeepPersistentSlab_OctagonNearTreelineWest',
    'DeepPersistentSlab_OctagonBelowTreelineEast',
    'DeepPersistentSlab_OctagonBelowTreelineNorth',
    'DeepPersistentSlab_OctagonBelowTreelineNorthEast',
    'DeepPersistentSlab_OctagonBelowTreelineNorthWest',
    'DeepPersistentSlab_OctagonBelowTreelineSouth',
    'DeepPersistentSlab_OctagonBelowTreelineSouthEast',
    'DeepPersistentSlab_OctagonBelowTreelineSouthWest',
    'DeepPersistentSlab_OctagonBelowTreelineWest',
    'StormSlabs_Likelihood',
    'StormSlabs_MaximumSize',
    'StormSlabs_MinimumSize',
    'StormSlabs_OctagonAboveTreelineEast',
    'StormSlabs_OctagonAboveTreelineNorth',
    'StormSlabs_OctagonAboveTreelineNorthEast',
    'StormSlabs_OctagonAboveTreelineNorthWest',
    'StormSlabs_OctagonAboveTreelineSouth',
    'StormSlabs_OctagonAboveTreelineSouthEast',
    'StormSlabs_OctagonAboveTreelineSouthWest',
    'StormSlabs_OctagonAboveTreelineWest',
    'StormSlabs_OctagonNearTreelineEast',
    'StormSlabs_OctagonNearTreelineNorth',
    'StormSlabs_OctagonNearTreelineNorthEast',
    'StormSlabs_OctagonNearTreelineNorthWest',
    'StormSlabs_OctagonNearTreelineSouth',
    'StormSlabs_OctagonNearTreelineSouthEast',
    'StormSlabs_OctagonNearTreelineSouthWest',
    'StormSlabs_OctagonNearTreelineWest',
    'StormSlabs_OctagonBelowTreelineEast',
    'StormSlabs_OctagonBelowTreelineNorth',
    'StormSlabs_OctagonBelowTreelineNorthEast',
    'StormSlabs_OctagonBelowTreelineNorthWest',
    'StormSlabs_OctagonBelowTreelineSouth',
    'StormSlabs_OctagonBelowTreelineSouthEast',
    'StormSlabs_OctagonBelowTreelineSouthWest',
    'StormSlabs_OctagonBelowTreelineWest',
    'WetSlabs_Likelihood',
    'WetSlabs_MaximumSize',
    'WetSlabs_MinimumSize',
    'WetSlabs_OctagonAboveTreelineEast',
    'WetSlabs_OctagonAboveTreelineNorth',
    'WetSlabs_OctagonAboveTreelineNorthEast',
    'WetSlabs_OctagonAboveTreelineNorthWest',
    'WetSlabs_OctagonAboveTreelineSouth',
    'WetSlabs_OctagonAboveTreelineSouthEast',
    'WetSlabs_OctagonAboveTreelineSouthWest',
    'WetSlabs_OctagonAboveTreelineWest',
    'WetSlabs_OctagonNearTreelineEast',
    'WetSlabs_OctagonNearTreelineNorth',
    'WetSlabs_OctagonNearTreelineNorthEast',
    'WetSlabs_OctagonNearTreelineNorthWest',
    'WetSlabs_OctagonNearTreelineSouth',
    'WetSlabs_OctagonNearTreelineSouthEast',
    'WetSlabs_OctagonNearTreelineSouthWest',
    'WetSlabs_OctagonNearTreelineWest',
    'WetSlabs_OctagonBelowTreelineEast',
    'WetSlabs_OctagonBelowTreelineNorth',
    'WetSlabs_OctagonBelowTreelineNorthEast',
    'WetSlabs_OctagonBelowTreelineNorthWest',
    'WetSlabs_OctagonBelowTreelineSouth',
    'WetSlabs_OctagonBelowTreelineSouthEast',
    'WetSlabs_OctagonBelowTreelineSouthWest',
    'WetSlabs_OctagonBelowTreelineWest',
    'WindSlab_Likelihood',
    'WindSlab_MaximumSize',
    'WindSlab_MinimumSize',
    'WindSlab_OctagonAboveTreelineEast',
    'WindSlab_OctagonAboveTreelineNorth',
    'WindSlab_OctagonAboveTreelineNorthEast',
    'WindSlab_OctagonAboveTreelineNorthWest',
    'WindSlab_OctagonAboveTreelineSouth',
    'WindSlab_OctagonAboveTreelineSouthEast',
    'WindSlab_OctagonAboveTreelineSouthWest',
    'WindSlab_OctagonAboveTreelineWest',
    'WindSlab_OctagonNearTreelineEast',
    'WindSlab_OctagonNearTreelineNorth',
    'WindSlab_OctagonNearTreelineNorthEast',
    'WindSlab_OctagonNearTreelineNorthWest',
    'WindSlab_OctagonNearTreelineSouth',
    'WindSlab_OctagonNearTreelineSouthEast',
    'WindSlab_OctagonNearTreelineSouthWest',
    'WindSlab_OctagonNearTreelineWest',
    'WindSlab_OctagonBelowTreelineEast',
    'WindSlab_OctagonBelowTreelineNorth',
    'WindSlab_OctagonBelowTreelineNorthEast',
    'WindSlab_OctagonBelowTreelineNorthWest',
    'WindSlab_OctagonBelowTreelineSouth',
    'WindSlab_OctagonBelowTreelineSouthEast',
    'WindSlab_OctagonBelowTreelineSouthWest',
    'WindSlab_OctagonBelowTreelineWest'
]


In [39]:
len(tmp)

3509

In [40]:
finalDf = tmp[cols]

In [41]:
finalDf.replace(np.nan, 'no-data', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  method=method,


In [43]:
finalDf[20:]

Unnamed: 0,Region,UnifiedRegion,PublishedDateTime,Day1Date,SpecialStatement,BottomLineSummary,ForecastUrl,Day1DangerAboveTreeline,Day1DangerNearTreeline,Day1DangerBelowTreeline,...,WindSlab_OctagonNearTreelineSouthWest,WindSlab_OctagonNearTreelineWest,WindSlab_OctagonBelowTreelineEast,WindSlab_OctagonBelowTreelineNorth,WindSlab_OctagonBelowTreelineNorthEast,WindSlab_OctagonBelowTreelineNorthWest,WindSlab_OctagonBelowTreelineSouth,WindSlab_OctagonBelowTreelineSouthEast,WindSlab_OctagonBelowTreelineSouthWest,WindSlab_OctagonBelowTreelineWest
24,Grand Mesa,Grand Mesa,2019-11-05 19:37:08,20191105,,,,Low,Low,Low,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
25,Gunnison,Gunnison,2019-11-05 19:37:19,20191105,,,,Low,Low,Low,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
26,Northern San Juan,Northern San Juan,2019-11-05 19:37:30,20191105,,,,Low,Low,Low,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
27,Southern San Juan,Southern San Juan,2019-11-05 19:37:39,20191105,,,,Low,Low,Low,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
28,Sangre De Cristo,Sangre De Cristo,2019-11-05 19:37:49,20191105,,,,Low,Low,Low,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4521,Southern,Southern,2021-05-31 16:34:00,20210531,,,,Low,no-data,no-data,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
4522,Southern,Southern,2021-05-31 16:34:00,20210531,,,,Low,no-data,no-data,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
4524,Northern,Northern,2021-06-01 00:01:00,20210601,,,,Low,no-data,no-data,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data
4525,Central,Central,2021-06-01 00:01:00,20210601,,,,Low,no-data,no-data,...,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data


In [44]:
finalDf.to_csv('CleanedForecastsCAIC2019.V1.csv', index=False, date_format='%Y%m%d %H:00')

In [46]:
#join with previous file
prev = pd.read_csv('./CleanedForecastsNWAC_CAIC_UAC.V1.2013-2020.csv', low_memory=False)

In [47]:
prev.head()

Unnamed: 0.1,Unnamed: 0,BottomLineSummary,Cornices_Likelihood,Cornices_MaximumSize,Cornices_MinimumSize,Cornices_OctagonAboveTreelineEast,Cornices_OctagonAboveTreelineNorth,Cornices_OctagonAboveTreelineNorthEast,Cornices_OctagonAboveTreelineNorthWest,Cornices_OctagonAboveTreelineSouth,...,WindSlab_OctagonNearTreelineNorthEast,WindSlab_OctagonNearTreelineNorthWest,WindSlab_OctagonNearTreelineSouth,WindSlab_OctagonNearTreelineSouthEast,WindSlab_OctagonNearTreelineSouthWest,WindSlab_OctagonNearTreelineWest,image_paths,image_types,image_urls,rose_url
0,0,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,...,1,1,1,1,1,1,no-data,no-data,no-data,no-data
1,1,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,...,1,1,1,1,1,1,no-data,no-data,no-data,no-data
2,2,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,...,1,1,1,1,1,1,no-data,no-data,no-data,no-data
3,3,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,...,1,1,1,1,1,1,no-data,no-data,no-data,no-data
4,4,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,no-data,...,1,1,1,1,1,1,no-data,no-data,no-data,no-data


In [52]:
prev.drop(columns=['Unnamed: 0'], inplace=True)

In [60]:
final_combined = pd.concat([prev, finalDf], axis=0)

In [61]:
final_combined.to_csv('CleanedForecastsNWAC_CAIC_UAC.V1.2013-2021.csv')