In [1]:
import pandas as pd
import numpy as np
import datetime

### Step 1 : Get the training, test, validation image name lists and sort them by the camera name

In [2]:
DATA_DIR = '../../data/processed/weather_vectors/'
WEATHER_DIR = '../../data/processed/by_camera_summary/'  # Directory with the weather data csv files for each camera.

# Path to the text files with the train, test, validation splits
TRAIN_SPLIT_PATH = DATA_DIR + 'train_images.txt'
VAL_SPLIT_PATH = DATA_DIR + 'val_images.txt'
TEST_SPLIT_PATH = DATA_DIR + 'test_images.txt'

In [3]:
with open(TRAIN_SPLIT_PATH) as f:
    lines = f.read()
    train_images = lines.split('\n')
    train_images = [i for i in train_images if i]
    train_images = sorted(train_images, key = lambda x: x.split('_')[-2])

with open(VAL_SPLIT_PATH) as f:
    lines = f.read()
    val_images = lines.split('\n')
    val_images = [i for i in val_images if i]
    val_images = sorted(val_images, key = lambda x: x.split('_')[-2])

with open(TEST_SPLIT_PATH) as f:
    lines = f.read()
    test_images = lines.split('\n')
    test_images = [i for i in test_images if i]
    test_images = sorted(test_images, key = lambda x: x.split('_')[-2])

### Step 2 : Get the weather vector corresponding to each image in the training, validation and test image lists
There is aggregated weather data available per camera. In order to efficiently get the weather data for our image name list, the following steps are done:
- Sort the image name lists based on the camera names to group fires from same camera but different dates together. This will minimize the number of times in which the csv files with the weather data is opened
- Since the lists are sorted, the lists are parsed from start to end and the weather data dataframes are loaded only when there is a change in the camera names. This gives us a dataframe with the historical weather data for the corresponding camera
- Each time we load a csv file with the weather data, we also create a new timestamp column which stores the timestamp in the UTC timestamp format. 
- For each image name, extract the timestamp and query the dataframe for the closest timestamp. This gives the weather data for the image. This is then written to a file
- For the wind direction, the angle is adjusted to be relative to the direction in which the camera is pointing


In [4]:
def extract_camera_name(image_name):
    directory_name = image_name.split('/')[0]
    camera_name = directory_name.split('_')[-1]
    if camera_name[:5] == 'FIRE-':
        camera_name = camera_name[5:]
    return camera_name

def extract_timestamp(image_name):
    timestamp = image_name.split('/')[-1]
    timestamp = int(timestamp.split('_')[0])
    return timestamp

def get_timestamp(date_string):
    date = datetime.datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S")
    timestamp = datetime.datetime.timestamp(date)
    return timestamp

def wind_direction_offset(camera_name, angle):
    direction = camera_name.split('-')[1]
    
    offset_dict = {'n':0, 'e':90, 's':180, 'w': 270}
    try:
        angle = (angle - offset_dict[direction] +360)%360
    except:
        pass
    return angle
    

In [5]:
def get_weather_data_from_image(output_path, image_list, weather_dir, cameras_without_weather=[]):
    with open(output_path, 'w') as f:
        prev_camera_name = 'XXX'
        f.write('image_name,air_temp_set_1,relative_humidity_set_1,wind_speed_set_1,'+
                'wind_gust_set_1,wind_direction_set_1,dew_point_temperature_set_1d,u,v,\n')
        for image in image_list:
            camera_name = extract_camera_name(image)
            timestamp = extract_timestamp(image)
            st_id = camera_name
            temp = 0
            hum = 0
            wind_speed = 0
            gust = 0
            direction = 0
            dew_temp = 0
            u= 0 
            v = 0
            if camera_name not in cameras_without_weather:
                if camera_name != prev_camera_name:
                    try:
                        df = pd.read_csv(WEATHER_DIR+camera_name+'.csv')
                    except:
                        try:
                            df = pd.read_csv(WEATHER_DIR+camera_name+'-c.csv')
                        except:
                            pass
                    prev_camera_name = camera_name
                    try:
                        df['Date_Time'] = df['Date_Time'].apply(lambda x: x[:-6])
                    
                        df['Timestamp'] = df['Date_Time'].apply(get_timestamp)
                    except:
                        print(camera_name)
                filtered_df = df[(df['Timestamp'] <= timestamp) &(df['Timestamp'] > (timestamp-7199))]          

                if(filtered_df.shape[0] >=1):
                    filtered_df = filtered_df.iloc[[-1]]

                for index, row in filtered_df.iterrows():
                    temp = row['air_temp_set_1']
                    hum = row['relative_humidity_set_1']
                    wind_speed = row['wind_speed_set_1']
                    gust = row['wind_gust_set_1']
                    direction = wind_direction_offset(camera_name, row['wind_direction_set_1'])
                    dew_temp = row['dew_point_temperature_set_1d']
                    u = row['u']
                    v = row['v']

            f.write(image+','+str(temp)+','+str(hum)+','+str(wind_speed)+','+str(gust)+','+str(direction)+','+str(dew_temp)\
                   +','+str(u)+','+str(v)+'\n')

In [6]:
get_weather_data_from_image(DATA_DIR+'weather_vectors_train.csv', train_images, WEATHER_DIR)
get_weather_data_from_image(DATA_DIR+'weather_vectors_val.csv', val_images, WEATHER_DIR)
get_weather_data_from_image(DATA_DIR+'weather_vectors_test.csv', test_images, WEATHER_DIR)