### Normalize the Weather Vectors
- Remove the vectors without any weather data (the zero vectors)
- Get the mean and standard deviation of each column in the training weather vectors.
- Normalize the train, valid and test weather vectors with the above mean and std deviation

In [1]:
import pandas as pd

In [2]:
DATA_DIR = '../../data/processed/weather_vectors/'

fires_without_weather = ['20170713_FIRE_smer-tcs8-mobo-c','20171026_FIRE_smer-tcs8-mobo-c','20171207_FIRE_smer-tcs8-mobo-c',
'20180504_FIRE_smer-tcs8-mobo-c','20180602_FIRE_smer-tcs8-mobo-c','20180603_FIRE_smer-tcs8-mobo-c',
'20180910_FIRE_smer-tcs8-mobo-c','20170927_FIRE_smer-tcs9-mobo-c','20180725_Cranston_sp-e-mobo-c',
'20190805_FIRE_sp-e-mobo-c','20180719_Skyline_sp-n-mobo-c','20190728_FIRE_sp-n-mobo-c',
'20190805_FIRE_sp-e-mobo-c','20180719_Skyline_sp-n-mobo-c','20190728_FIRE_sp-n-mobo-c',
'20170708_Whittier_syp-n-mobo-c','20180720_FIRE_syp-w-mobo-c','20180718_FIRE_syp-w-mobo-c']


In [4]:
weather_df = pd.read_csv(DATA_DIR+'weather_vectors_train.csv', index_col=False)
weather_df['fire_name'] = weather_df['image_name'].apply(lambda x: x.split('/')[0])
weather_df_no_data = weather_df[weather_df['fire_name'].isin(fires_without_weather)]
weather_df = weather_df[~weather_df['fire_name'].isin(fires_without_weather)]

filtered_train_image_names = weather_df['image_name']

# columns = ['air_temp_set_1','dew_point_temperature_set_1d','relative_humidity_set_1','u','v',
#         'wind_direction_set_1','wind_gust_set_1','wind_speed_set_1']

columns = ['u','v','wind_direction_set_1']

means = []
st_devs = []

for col in columns:
    means.append(weather_df[col].mean())
    st_devs.append(weather_df[col].std(ddof=0))
    weather_df[col] = (weather_df[col] - means[-1])/st_devs[-1]

In [5]:
weather_df_val = pd.read_csv(DATA_DIR+'weather_vectors_val.csv', index_col=False)
weather_df_val['fire_name'] = weather_df_val['image_name'].apply(lambda x: x.split('/')[0])
weather_df_test = pd.read_csv(DATA_DIR+'weather_vectors_test.csv',index_col=False)
weather_df_test['fire_name'] = weather_df_test['image_name'].apply(lambda x: x.split('/')[0])

weather_df_val_no_data = weather_df_val[weather_df_val['fire_name'].isin(fires_without_weather)]
weather_df_val = weather_df_val[~weather_df_val['fire_name'].isin(fires_without_weather)]

weather_df_test_no_data = weather_df_test[weather_df_test['fire_name'].isin(fires_without_weather)]
weather_df_test = weather_df_test[~weather_df_test['fire_name'].isin(fires_without_weather)]

filtered_val_image_names = weather_df_val['image_name']
filtered_test_image_names = weather_df_test['image_name']

for i, col in enumerate(columns):
    weather_df_val[col] = (weather_df_val[col] - means[i])/st_devs[i]
    weather_df_test[col] = (weather_df_test[col] - means[i])/st_devs[i]

In [6]:
weather_df = weather_df.append(weather_df_val, ignore_index=True)
weather_df = weather_df.append(weather_df_test, ignore_index=True)
weather_df.drop(columns=['fire_name','Unnamed: 9'], inplace=True)
weather_df.to_csv(DATA_DIR+'weather_vectors_normalized.csv', index=False)

  weather_df = weather_df.append(weather_df_val, ignore_index=True)
  weather_df = weather_df.append(weather_df_test, ignore_index=True)


In [7]:
filtered_train_image_names.to_csv(DATA_DIR+'filtered_train_images.txt', header=None, index=None, sep=' ')
filtered_val_image_names.to_csv(DATA_DIR+'filtered_val_images.txt', header=None, index=None, sep=' ')
filtered_test_image_names.to_csv(DATA_DIR+'filtered_test_images.txt', header=None, index=None, sep=' ')