In [1]:
import datetime
import pandas as pd
import numpy as np

In [2]:
wind = pd.read_csv('../Classification Data/wind.csv')
wind.pop('Unnamed: 0')
wind['Time'] =  pd.to_datetime(wind['Time'], format='%Y-%m-%d %H:%M')
wind.tail()

Unnamed: 0,ID,Name,Time,Record ID,Status,Latitude,Longitude,Max wind (knots),Min pressure (mbar),34kt wind radii max NE,...,34kt wind radii max SW,34kt wind radii max NW,50kt wind radii max NE,50kt wind radii max SE,50kt wind radii max SW,50kt wind radii max NW,64kt wind radii max NE,64kt wind radii max SE,64kt wind radii max SW,64kt wind radii max NW
51341,AL162018,OSCAR,2018-11-03 12:00:00,,EX,57.9N,19.6W,55,960.0,780.0,...,660.0,480.0,0.0,240.0,0.0,0.0,0.0,0.0,0.0,0.0
51342,AL162018,OSCAR,2018-11-03 18:00:00,,EX,58.9N,17.1W,50,964.0,480.0,...,660.0,420.0,0.0,240.0,0.0,0.0,0.0,0.0,0.0,0.0
51343,AL162018,OSCAR,2018-11-04 00:00:00,,EX,59.8N,14.5W,45,968.0,360.0,...,480.0,360.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
51344,AL162018,OSCAR,2018-11-04 06:00:00,,EX,60.8N,12.1W,40,973.0,270.0,...,360.0,270.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
51345,AL162018,OSCAR,2018-11-04 12:00:00,,EX,62.4N,9.1W,40,977.0,240.0,...,0.0,210.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
# Transform windspeed (in knots per hour) to hurricane category.
def wind_to_category(kph):
    if kph < 64:
        return 0
    elif 64 <= kph and kph <= 82:
        return 1
    elif 83 <= kph and kph <= 95:
        return 2
    elif 96 <= kph and kph <= 112:
        return 3
    elif 113 <= kph and kph <= 136:
        return 4
    else:
        return 5

# Given a Datetime object, find the closest entry to that
# time in wind.csv and return the greatest hurricane
# category in the image corresponding to that time.
def category(time):   
    
    criteria1 = wind['Time'] >= datetime.datetime(time.year, time.month, time.day)
    criteria2 = wind['Time'] <= datetime.datetime(time.year, time.month, time.day) + datetime.timedelta(days = 1)
    criteria = criteria1 & criteria2
    
    # This is a sub-dataframe of wind containing all hurricane entries on that day.
    subset = wind[criteria]
    
    current_delta = datetime.timedelta(days = 1)
    result_kph = 0
    
    for i in range(subset.shape[0]):
        row = subset.iloc[i]
        if (time - row['Time']) <= current_delta and (row['Max wind (knots)'] > result_kph):
            current_delta = time - row['Time']
            result_kph = row['Max wind (knots)']
    
    return wind_to_category(result_kph)

In [4]:
### Unzipping the training images in "800x375 Training.zip"
### Also creating the training image directories

import os
import zipfile as zf
files = zf.ZipFile("800x375 Training.zip", 'r')
if not os.path.exists('./Training'):
    os.mkdir('./Training')
files.extractall('Training')
files.close()

for i in range(6):
    if not os.path.exists('./Training/' + str(i)):
        os.mkdir('./Training/' + str(i))

In [7]:
from shutil import move

image_path = './Training/'

delta = datetime.timedelta(hours = 3)
last = datetime.datetime(2019, 11, 1, 0)
frame = datetime.datetime(2010, 7, 1, 0)

# labels = pd.DataFrame({'Filename':[], 'Category':[]})

count = 0

while frame < last:
    count += 1
    if count % 1000 == 0:
        print(frame.strftime("%Y-%m-%d-%H"))
    filepath = image_path + frame.strftime("%Y-%m-%d-%H") + ".jpg"
    try:
        with open(filepath) as image:
            pass
        cat = category(frame)
#         row = pd.DataFrame({'Filename': [frame], 'Category': [cat]})
#         labels = labels.append(row)
        move(filepath, image_path + str(cat) + '/' + frame.strftime("%Y-%m-%d-%H") + ".jpg")
    except:
        pass
    frame += delta

2010-11-02-21
2011-03-07-21
2011-07-10-21
2011-11-12-21
2012-03-16-21
2012-07-19-21
2012-11-21-21
2013-03-26-21
2013-07-29-21
2013-12-01-21
2014-04-05-21
2014-08-08-21
2014-12-11-21
2015-04-15-21
2015-08-18-21
2015-12-21-21
2016-04-24-21
2016-08-27-21
2016-12-30-21
2017-05-04-21
2017-09-06-21
2018-01-09-21
2018-05-14-21
2018-09-16-21
2019-01-19-21
2019-05-24-21
2019-09-26-21


In [9]:
# Finally, take the /Training folder and zip it

import shutil
shutil.make_archive('Training', 'zip', 'Training')

'/home/jovyan/DS4Earth-Hurricane-Modeling/Image Data/Training.zip'