In [1]:
mode = '_mean' if False else '_all'
print(mode)

_all


In [2]:
import pandas as pd
import numpy as np
import gc
from sklearn.pipeline import Pipeline, TransformerMixin
from os import path

In [3]:
class ConvertToDatetime(TransformerMixin):
        
    def transform(self, df, **transform_params):
        if 'timestamp' in df.columns:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
        return df

    def fit(self, X, y=None, **fit_params):
        return self

In [4]:
class AddTimeFeatures(TransformerMixin):
        
    def transform(self, df_a, **transform_params):
        # TODO: try week of year as numerical 
        df = df_a
        df['dayofweek'] = df['timestamp'].dt.dayofweek.astype('uint8') # vs weekend?
        #df['weekday'] = df['timestamp'].dt.weekday.astype('category')
        #df['dayofweek_hour'] = (df['timestamp'].dt.dayofweek * 24) + df['timestamp'].dt.hour
        #df['dayofweek_hour'] = df['dayofweek_hour'].astype('category')
        #df['week'] = df['timestamp'].dt.week.astype('category')
        df['hour'] = df['timestamp'].dt.hour.astype('uint8')
        return df
        
    def fit(self, X, y=None, **fit_params):
        return self

In [5]:
file_dtype = {
    'weather_test' : {'site_id': np.int8, 'air_temperature': np.float16, 'cloud_coverage': np.float16, 'dew_temperature': np.float16,
                     'precip_depth_1_hr': np.float16, 'sea_level_pressure': np.float16, 'wind_direction': np.float16, 'wind_speed': np.float16},
    'weather_train' : {'site_id': np.int8, 'air_temperature': np.float, 'cloud_coverage': np.float16, 'dew_temperature': np.float16,
                     'precip_depth_1_hr': np.float16, 'sea_level_pressure': np.float16, 'wind_direction': np.float16, 'wind_speed': np.float16}
}

def loadFile(name):
    return ConvertToDatetime().transform(
            pd.read_csv('../input/ashrae-energy-prediction/' + name + '.csv', dtype=file_dtype[name]))
        
weather_train = loadFile('weather_train')
weather_test = loadFile('weather_test')


In [6]:
# align weather
def weatherSiteOffsets():
    weather = pd.concat([weather_train,weather_test],ignore_index=True)
    weather['timestamp'] = pd.to_datetime(weather['timestamp'])
    weather_key = ['site_id', 'timestamp']

    temp_skeleton = weather[weather_key + ['air_temperature']].drop_duplicates(subset=weather_key).sort_values(by=weather_key).copy()

    # calculate ranks of hourly temperatures within date/site_id chunks
    temp_skeleton['temp_rank'] = temp_skeleton.groupby(['site_id', temp_skeleton.timestamp.dt.date])['air_temperature'].rank('average')
    
    # create a dataframe of site_ids (0-16) x mean hour rank of temperature within day (0-23)
    df_2d = temp_skeleton.groupby(['site_id', temp_skeleton.timestamp.dt.hour])['temp_rank'].mean().unstack(level=1)

    # Subtract the columnID of temperature peak by 14, getting the timestamp alignment gap.
    site_ids_offsets = pd.Series(df_2d.values.argmax(axis=1) - 14)
    site_ids_offsets.index.name = 'site_id'
    return site_ids_offsets

site_time_offsets_df = weatherSiteOffsets()

def alignWeather(df):
    df['offset'] = df.site_id.map(site_time_offsets_df)
    df['timestamp_aligned'] = (df.timestamp - pd.to_timedelta(df.offset, unit='H'))
    df['timestamp'] = df['timestamp_aligned']
    del df['timestamp_aligned'], df['offset']
    gc.collect()
    return df

#weather_train = alignWeather(weather_train)
#weather_test = alignWeather(weather_test)



In [7]:
if path.exists('../input/ashrae-energy-prediction/weather_train_s_radiation.pickle'):
    weather_train['s_radiation'] =  pd.read_pickle('../input/ashrae-energy-prediction/weather_train_s_radiation.pickle')
    weather_test['s_radiation'] =  pd.read_pickle('../input/ashrae-energy-prediction/weather_test_s_radiation.pickle')
    weather_train['s_radiation'] = weather_test['s_radiation'].astype(np.float16)
    weather_test['s_radiation'] = weather_test['s_radiation'].astype(np.float16)
print(weather_train.head())

   site_id           timestamp  air_temperature  cloud_coverage  \
0        0 2016-01-01 00:00:00             25.0             6.0   
1        0 2016-01-01 01:00:00             24.4             NaN   
2        0 2016-01-01 02:00:00             22.8             2.0   
3        0 2016-01-01 03:00:00             21.1             2.0   
4        0 2016-01-01 04:00:00             20.0             2.0   

   dew_temperature  precip_depth_1_hr  sea_level_pressure  wind_direction  \
0         20.00000                NaN              1019.5             0.0   
1         21.09375               -1.0              1020.0            70.0   
2         21.09375                0.0              1020.0             0.0   
3         20.59375                0.0              1020.0             0.0   
4         20.00000               -1.0              1020.0           250.0   

   wind_speed  s_radiation  
0    0.000000          0.0  
1    1.500000          0.0  
2    0.000000          0.0  
3    0.000000     

In [8]:
# TODO: do something with race precipitation is coded as a “-1” value
# https://www.kaggle.com/c/ashrae-energy-prediction/discussion/113103#latest-664978
# Test out below
def cleanPrecipDepth(df):
    df.loc[df['precip_depth_1_hr'] == -1, 'precip_depth_1_hr'] = 0.25
    return df

#weather_train = cleanPrecipDepth(weather_train)
#weather_test = cleanPrecipDepth(weather_test)

#print(weather_train['precip_depth_1_hr'].value_counts().sort_index())

In [9]:
class ImputeCloudCoverage(TransformerMixin):
        
    def transform(self, df, **transform_params):
        # set age of building to mediam of site_id
        # else if set ot overall median
        median = df['cloud_coverage'].median()
        # Set all year_built NaNs to site mean for year_built
        for i, i_median in df.groupby(['site_id'])['cloud_coverage'].median().items():
            # TODO add in +9 as a NAN
            if not np.isnan(i_median):
                df.loc[(df['cloud_coverage'].isnull()) & (df['site_id'] == i), 'cloud_coverage'] = i_median
            else:
                df.loc[(df['cloud_coverage'].isnull()) & (df['site_id'] == i), 'cloud_coverage'] = median
        df['cloud_coverage'] = np.uint8(df['cloud_coverage'])
        df['cloud_coverage'] = df['cloud_coverage']
        del median
        gc.collect()
        return df
        
    def fit(self, X, y=None, **fit_params):
        return self

In [10]:
def fill_with_po3(wmatrix):
    return wmatrix.fillna(wmatrix.interpolate(method='polynomial', order=3))

def fill_with_lin(wmatrix):
    return wmatrix.fillna(wmatrix.interpolate(method='linear'))

def fill_with_mix(wmatrix):
    wmatrix = (wmatrix.fillna(wmatrix.interpolate(method='linear', limit_direction='both')) +
               wmatrix.fillna(wmatrix.interpolate(method='polynomial', order=3, limit_direction='both'))
              ) * 0.5
    # workaround: fill last NANs with neighbour
    assert wmatrix.count().min() >= len(wmatrix)-1 # only the first item is missing
    return wmatrix.fillna(wmatrix.iloc[1])         # fill with second item
    
class InterpolateTempMix(TransformerMixin):
    
    #https://www.kaggle.com/rcpeters/clean-weather-data-eda
    def transform(self, w_df, **transform_params):
        print(w_df.shape[0])
        print(w_df.head())
        new_df = None
        for col in ['air_temperature', 'dew_temperature']:
            w_df[col] = w_df[col].astype(np.float32) # 16 doesn't support unstack
            filled = fill_with_mix(w_df.pivot(index='timestamp', columns='site_id', values=col))
            filled = filled.sort_index().unstack().to_frame(col)
            filled = filled.reset_index()  
            if new_df is None:
                new_df = filled
            else:
                new_df[col] = filled[col]
            new_df[col] = new_df[col].astype(np.float16) 
            print(new_df.shape[0])
        new_df = new_df.reset_index(drop=True).merge(w_df[['site_id', 'timestamp', 'cloud_coverage', 'precip_depth_1_hr', 'wind_direction', 'wind_speed', 'sea_level_pressure']],
                           how='left', on=['site_id', 'timestamp'])
        return new_df

    def fit(self, X, y=None, **fit_params):
        return self
#print(weather_train.head(20))
#print(InterpolateTempMix().transform(weather_train.iloc[26320:26340,:]))

In [11]:
class RelativeHumidity(TransformerMixin):
        
    def transform(self, df, **transform_params):
        exp   = pd.np.exp
        df['relative_humidity'] = 100*(exp((17.625*df['dew_temperature'])/(243.04+df['dew_temperature'])) / exp((17.625*df['air_temperature'])/(243.04+df['air_temperature'])))
        return df

    def fit(self, X, y=None, **fit_params):
        return self
                                             
print(RelativeHumidity().transform(weather_train.sample(20)))

        site_id           timestamp  air_temperature  cloud_coverage  \
29319         3 2016-05-04 13:00:00             12.2             NaN   
50529         5 2016-10-04 14:00:00             16.0             NaN   
119680       13 2016-09-03 17:00:00             21.1             NaN   
42544         4 2016-11-05 19:00:00             18.3             NaN   
17613         2 2016-01-03 18:00:00             17.8             2.0   
23782         2 2016-09-16 19:00:00             33.3             2.0   
49873         5 2016-09-07 05:00:00             17.0             NaN   
89644        10 2016-03-25 21:00:00              7.8             NaN   
83409         9 2016-07-10 00:00:00             34.4             2.0   
80107         9 2016-02-23 08:00:00             15.0             NaN   
61866         7 2016-01-21 14:00:00            -12.9             NaN   
48413         5 2016-07-08 09:00:00             16.0             NaN   
86243         9 2016-11-05 02:00:00             21.7            

In [12]:
# TODO: try both for direction
class ImputeWeather(TransformerMixin):

    def __init__(self, method:str='linear', gap_limit:int=None, limit_direction:str='forward'):
        self._method = method
        self._gap_limit = gap_limit
        self._limit_direction = limit_direction
        
    def transform(self, weather_df, **transform_params):
        grouped_weather_df = weather_df.groupby('site_id').apply(lambda group: group.interpolate(method=self._method, limit=self._gap_limit, limit_direction=self._limit_direction))
        if 'cloud_coverage' in grouped_weather_df.columns:
            grouped_weather_df['cloud_coverage'] = grouped_weather_df['cloud_coverage'].round(decimals=0).clip(0,8)
        grouped_weather_df.reset_index(inplace=True)
        weather_df = grouped_weather_df.drop(['index'], axis=1)
        nans = weather_df.isna().any()
        #TODO Trudie I had to add the following two lines below
        for col in nans[nans==True].index.tolist():
            weather_df[col].fillna(weather_df[col].mode()[0], inplace=True)
        gc.collect()
        return weather_df

    def fit(self, X, y=None, **fit_params):
        return self
    
#print(ImputeWeather().transform(weather_train.head(20)))
nans = ImputeWeather().transform(weather_train).isna().any()
print(nans)
print(nans[nans==True].index.tolist())

site_id               False
timestamp             False
air_temperature       False
cloud_coverage        False
dew_temperature       False
precip_depth_1_hr     False
sea_level_pressure    False
wind_direction        False
wind_speed            False
s_radiation           False
dtype: bool
[]


In [13]:
class FillMean(TransformerMixin):

    def __init__(self, cols):
        self._cols = cols
        
    def transform(self, df, **transform_params):
        for col in self._cols:
            df[col] = df[col].fillna(df[col].mean())
        return df

    def fit(self, X, y=None, **fit_params):
        return self

In [14]:
# TODO: rename to rolling
class AddWeatherRolls(TransformerMixin):
    
    def __init__(self, window, center=False):
        self._window = window
        self._center = center
        
    def transform(self, weather_df, **transform_params):
        group_df = weather_df.groupby(['site_id'])
        cols = ['air_temperature', 'cloud_coverage', 'dew_temperature', 'precip_depth_1_hr', 'sea_level_pressure', 'wind_direction', 'wind_speed']
        if 's_radiation' in weather_df.columns:
            cols.append('s_radiation')
        if 'relative_humidity' in weather_df.columns:
            cols.append('relative_humidity')
        rolled = group_df[cols].rolling(window=self._window, center=self._center, min_periods=0)
        lag_mean = rolled.mean().reset_index().astype(np.float16)
        lag_max = rolled.max().reset_index().astype(np.float16)
        lag_min = rolled.min().reset_index().astype(np.float16)
        lag_std = rolled.std().reset_index().astype(np.float16)
        c_chars = '_c' if self._center== True else ''
        for col in cols:
            weather_df[f'{col}_rmean_{self._window}{c_chars}'] = lag_mean[col]
            if mode == '_all':
                weather_df[f'{col}_rmax_{self._window}{c_chars}'] = lag_max[col]
                weather_df[f'{col}_rmin_{self._window}{c_chars}'] = lag_min[col]
                weather_df[f'{col}_rstd_{self._window}{c_chars}'] = lag_std[col]
        del group_df, rolled
        gc.collect()
        return weather_df

    def fit(self, X, y=None, **fit_params):
        return self
    
print(AddWeatherRolls(72, True).transform(weather_train.head(20)))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


    site_id           timestamp  air_temperature  cloud_coverage  \
0         0 2016-01-01 00:00:00             25.0             6.0   
1         0 2016-01-01 01:00:00             24.4             NaN   
2         0 2016-01-01 02:00:00             22.8             2.0   
3         0 2016-01-01 03:00:00             21.1             2.0   
4         0 2016-01-01 04:00:00             20.0             2.0   
5         0 2016-01-01 05:00:00             19.4             NaN   
6         0 2016-01-01 06:00:00             21.1             6.0   
7         0 2016-01-01 07:00:00             21.1             NaN   
8         0 2016-01-01 08:00:00             20.6             NaN   
9         0 2016-01-01 09:00:00             21.1             NaN   
10        0 2016-01-01 10:00:00             21.1             NaN   
11        0 2016-01-01 11:00:00             20.6             NaN   
12        0 2016-01-01 12:00:00             18.9             6.0   
13        0 2016-01-01 13:00:00             20.0

In [15]:
weather_pipes = Pipeline(
    steps=[
        #('convertToDatetime', ConvertToDatetime()),
        ('interpolateTempMix', InterpolateTempMix()),
        ('imputeWeather', ImputeWeather()),
        ('fillMean',FillMean(['air_temperature','dew_temperature'
                              , 'precip_depth_1_hr', 'sea_level_pressure'])),
        ('imputeCloudCoverage', ImputeCloudCoverage()),
        ('relativeHumidity', RelativeHumidity()),
        ('addWeatherLags3', AddWeatherRolls(3)),
        #('addWeatherRolls6', AddWeatherRolls(6)),
        #('addWeatherRolls48', AddWeatherRolls(48)),
        ('addWeatherRolls72', AddWeatherRolls(72)),
        #('addWeatherRolls96', AddWeatherRolls(96)),
        #('addWeatherRolls144', AddWeatherRolls(144))
    ]
)

In [16]:
all = weather_train.append(weather_test, ignore_index=True)
all = weather_pipes.transform(all)
all = alignWeather(all)

all.sample(20, random_state=42)

417016
   site_id           timestamp  air_temperature  cloud_coverage  \
0        0 2016-01-01 00:00:00             25.0             6.0   
1        0 2016-01-01 01:00:00             24.4             NaN   
2        0 2016-01-01 02:00:00             22.8             2.0   
3        0 2016-01-01 03:00:00             21.1             2.0   
4        0 2016-01-01 04:00:00             20.0             2.0   

   dew_temperature  precip_depth_1_hr  sea_level_pressure  wind_direction  \
0         20.00000                NaN              1019.5             0.0   
1         21.09375               -1.0              1020.0            70.0   
2         21.09375                0.0              1020.0             0.0   
3         20.59375                0.0              1020.0             0.0   
4         20.00000               -1.0              1020.0           250.0   

   wind_speed  s_radiation  
0    0.000000          0.0  
1    1.500000          0.0  
2    0.000000          0.0  
3    0.0000

Unnamed: 0,site_id,timestamp,air_temperature,dew_temperature,cloud_coverage,precip_depth_1_hr,wind_direction,wind_speed,sea_level_pressure,relative_humidity,...,wind_direction_rmin_72,wind_direction_rstd_72,wind_speed_rmean_72,wind_speed_rmax_72,wind_speed_rmin_72,wind_speed_rstd_72,relative_humidity_rmean_72,relative_humidity_rmax_72,relative_humidity_rmin_72,relative_humidity_rstd_72
413730,15,2018-03-09 12:00:00,0.600098,-4.398438,1,-1.0,280.0,5.699219,1006.0,69.25,...,0.0,98.4375,3.720703,6.699219,0.0,1.895508,79.9375,96.375,50.5,13.398438
226437,8,2017-10-28 16:00:00,24.40625,20.0,8,0.0,70.0,2.599609,1008.5,76.4375,...,0.0,135.625,3.070312,6.199219,0.0,1.712891,64.8125,93.1875,26.65625,18.171875
119780,4,2017-08-29 12:00:00,17.796875,13.898438,2,0.0,276.75,2.599609,1013.0,77.875,...,0.0,77.0,4.046875,7.699219,0.0,1.958984,80.8125,100.0,36.90625,16.515625
256459,9,2018-04-01 12:00:00,25.0,17.203125,4,0.0,140.0,3.599609,1014.5,61.96875,...,0.0,88.0,2.615234,6.699219,0.0,1.107422,63.375,93.1875,18.78125,19.78125
140474,5,2017-01-08 02:00:00,9.0,9.0,8,0.0,180.0,1.0,1017.0,100.0,...,40.0,88.4375,3.251953,8.203125,1.0,1.583008,94.0625,100.0,75.75,6.71875
158835,6,2016-02-11 21:00:00,-6.101562,-17.796875,0,0.0,300.0,3.099609,1026.0,39.125,...,0.0,103.1875,2.664062,5.699219,0.0,1.447266,48.125,96.5,23.46875,20.796875
248824,9,2017-05-18 09:00:00,27.203125,22.203125,4,0.0,150.0,4.101562,1008.5,74.125,...,0.0,52.75,3.310547,6.199219,0.0,1.533203,73.0,97.0,35.0,17.015625
235202,8,2018-10-28 21:00:00,16.09375,10.601562,0,0.0,0.0,0.0,1020.5,69.8125,...,0.0,115.75,3.890625,8.796875,0.0,2.013672,72.0625,100.0,33.09375,17.609375
147111,5,2017-10-11 15:00:00,15.0,14.0,0,0.0,240.0,5.699219,1017.0,93.625,...,180.0,25.984375,5.050781,8.203125,1.0,1.860352,88.5625,100.0,72.4375,7.328125
79893,3,2016-02-10 15:00:00,2.199219,-9.398438,6,0.0,310.0,5.699219,1009.0,42.03125,...,0.0,123.8125,4.140625,8.796875,0.0,1.837891,63.4375,92.5,40.40625,16.28125


In [17]:
all.dtypes

site_id                                 int64
timestamp                      datetime64[ns]
air_temperature                       float16
dew_temperature                       float16
cloud_coverage                          uint8
precip_depth_1_hr                     float16
wind_direction                        float16
wind_speed                            float16
sea_level_pressure                    float16
relative_humidity                     float16
air_temperature_rmean_3               float16
air_temperature_rmax_3                float16
air_temperature_rmin_3                float16
air_temperature_rstd_3                float16
cloud_coverage_rmean_3                float16
cloud_coverage_rmax_3                 float16
cloud_coverage_rmin_3                 float16
cloud_coverage_rstd_3                 float16
dew_temperature_rmean_3               float16
dew_temperature_rmax_3                float16
dew_temperature_rmin_3                float16
dew_temperature_rstd_3            

In [18]:
print(all.isna().any())
print(all.loc[all['precip_depth_1_hr'].isna(),'precip_depth_1_hr'])

site_id                        False
timestamp                      False
air_temperature                False
dew_temperature                False
cloud_coverage                 False
precip_depth_1_hr              False
wind_direction                 False
wind_speed                     False
sea_level_pressure             False
relative_humidity              False
air_temperature_rmean_3        False
air_temperature_rmax_3         False
air_temperature_rmin_3         False
air_temperature_rstd_3          True
cloud_coverage_rmean_3         False
cloud_coverage_rmax_3          False
cloud_coverage_rmin_3          False
cloud_coverage_rstd_3           True
dew_temperature_rmean_3        False
dew_temperature_rmax_3         False
dew_temperature_rmin_3         False
dew_temperature_rstd_3          True
precip_depth_1_hr_rmean_3      False
precip_depth_1_hr_rmax_3       False
precip_depth_1_hr_rmin_3       False
precip_depth_1_hr_rstd_3        True
sea_level_pressure_rmean_3     False
s

In [19]:
all.to_pickle(f'../input/ashrae-energy-prediction-pickles/weather_processed{mode}.pickle')


In [20]:
all.dtypes


site_id                                 int64
timestamp                      datetime64[ns]
air_temperature                       float16
dew_temperature                       float16
cloud_coverage                          uint8
precip_depth_1_hr                     float16
wind_direction                        float16
wind_speed                            float16
sea_level_pressure                    float16
relative_humidity                     float16
air_temperature_rmean_3               float16
air_temperature_rmax_3                float16
air_temperature_rmin_3                float16
air_temperature_rstd_3                float16
cloud_coverage_rmean_3                float16
cloud_coverage_rmax_3                 float16
cloud_coverage_rmin_3                 float16
cloud_coverage_rstd_3                 float16
dew_temperature_rmean_3               float16
dew_temperature_rmax_3                float16
dew_temperature_rmin_3                float16
dew_temperature_rstd_3            

In [21]:
# ingore the rest




















raise("rest didn't test well")

TypeError: exceptions must derive from BaseException

In [None]:
w_mean_df = AddTimeFeatures().transform(all.loc[:,['site_id', 'timestamp', 'air_temperature', 'dew_temperature',
       'cloud_coverage', 'precip_depth_1_hr', 'wind_direction', 'wind_speed',
       'sea_level_pressure', 'relative_humidity']])
print(w_mean_df.columns)
print(w_mean_df.sample(20))

In [None]:
if 'timestamp' in w_mean_df.columns:
    w_mean_df = w_mean_df.drop("timestamp", axis=1)

def getHWODDecs(feature):
    group = w_mean_df.groupby(['site_id','hour','dayofweek'])[feature]
    group = group.describe()
    desc_DF = group.reset_index()
    col_dict = {}
    print(desc_DF.columns)
    for col in desc_DF.columns:
        if col not in ['site_id','hour','dayofweek']:
            #desc_DF[col] = desc_DF[col].astype(np.float16)
            col_dict[col] = feature + '_h_d_' + col
    return desc_DF.drop('count', axis=1, errors='ignore').rename(columns=col_dict)


In [None]:
cols = (x for x in w_mean_df.columns if x not in ['site_id','hour','dayofweek', 'sea_level_pressure', 'precip_depth_1_hr','wind_direction'])
desc_df = None
for x in cols:
    print(x)
    if desc_df is None:
        desc_df = getHWODDecs(x)
    else:
        desc_df = desc_df.merge(getHWODDecs(x), on=['site_id','hour','dayofweek'])
print(desc_df)

In [None]:
#print(desc_df[desc_df.isin([np.nan, np.inf, -np.inf]).any(1)])
desc_df.columns.to_series()[np.isinf(desc_df).any()]

In [None]:
desc_df.to_csv('weather_desc_h_dow.csv')

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler() 
scaled_df = desc_df
cols = list(x for x in scaled_df.columns if x not in ['site_id','hour','dayofweek'])
print(cols)
scaled_values = scaler.fit_transform(scaled_df[cols])
scaled_df.loc[:,cols] = scaled_values
print(scaled_df.columns)
#_m_dow_desc_DF = reduce_mem_usage(_m_dow_desc_DF)
for col in cols:
    print(col)
    scaled_df[col] = scaled_df[col].astype(np.float16)
print(scaled_df.sample(20))
scaled_df.to_csv('weather_desc_scaled.csv')