In [57]:
#######################################
# CS 424 - Project 2
# Kevin Kowalski
# Samuel Kajah
# Vijay Vemu
#######################################
#
# This file contains the code to clean
# the original data set. It outputs
# the necessary plots and updated data
# set to be used by shiny.
#
# This file was used for testing and 
# plotting before using RStudio for a
# dashboard.
#
#######################################

In [58]:
# import libraries
library(leaflet)
library(lubridate)
library(dplyr)
library(ggplot2)
library(scales)
library(stringr)
library(hashmap)
library(comprehenr)

In [59]:
# create appropriate column names for data in the following format
"
data rows
col 1 – date YYYYmmDD
col 2 – time HHMM
col 3 – record identifier (factor)
col 4 – storm type (factor)
col 5 – latitude (n/s)
col 6 – longitude (e/w)
col 7 – Maximum sustained wind (in knots)
col 8 – Minimum Pressure (in millibars)
col 9 – 34 kt wind radii maximum extent in northeastern quadrant (in nautical miles) 
col 10 – 34 kt wind radii maximum extent in southeastern quadrant (in nautical miles) 
col 11 – 34 kt wind radii maximum extent in southwestern quadrant (in nautical miles) 
col 12 – 34 kt wind radii maximum extent in northwestern quadrant (in nautical miles) 
col 13 – 50 kt wind radii maximum extent in northeastern quadrant (in nautical miles) 
col 14 – 50 kt wind radii maximum extent in southeastern quadrant (in nautical miles) 
col 15 – 50 kt wind radii maximum extent in southwestern quadrant (in nautical miles) 
col 16 – 50 kt wind radii maximum extent in northwestern quadrant (in nautical miles) 
col 17 – 64 kt wind radii maximum extent in northeastern quadrant (in nautical miles)
col 18 – 64 kt wind radii maximum extent in southeastern quadrant (in nautical miles)
col 19 – 64 kt wind radii maximum extent in southwestern quadrant (in nautical miles) 
col 20 – 64 kt wind radii maximum extent in northwestern quadrant (in nautical miles)
"

data_row_header = c('Date', 'Time', 'Record_ID', 'Storm_Type', 'Lat', 'Lon', 'Speed', 'Pressure', 
                   'Wind_Radii_NE_34', 'Wind_Radii_SE_34', 'Wind_Radii_SW_34', 'Wind_Radii_NW_34',
                   'Wind_Radii_NE_50', 'Wind_Radii_SE_50', 'Wind_Radii_SW_50', 'Wind_Radii_NW_50',
                   'Wind_Radii_NE_64', 'Wind_Radii_SE_64', 'Wind_Radii_SW_64', 'Wind_Radii_NW_64', 'Size')

In [60]:
# custom functions for reading and formatting data

# determine's if a particular row begins a header for a hurricane entry
is_header_row = function(row) {
    if (sum(is.na(row)) == 15)
        TRUE
    else
        FALSE
}

# create a vector of indices of all hurricane header rows
header_locations = function(data) {
    to_vec(
        for (row in 1: nrow(data))
            if (is_header_row(data[row, ])) 
    row)
}

# create a list of names and data for each hurricane
make_huricane_data_map = function(data, header_indices) {
    names = list()
    name_data = list()
    i = 1 
    for (row_index in 1: length(header_indices)) {
        row = header_indices[row_index]
        current_header = data[row, ]
        names[[i]] = current_header[1, 2]
        if (row_index == length(header_indices)) {
            name_data[[i]] = data[(row + 1): nrow(data), ]
        } else {
            next_header = header_indices[row_index + 1]
            name_data[[i]] = data[(row + 1): (next_header - 1), ]
        }
        i = i + 1
    }
    list(names = names, name_data = name_data)
}

In [61]:
# rename original text files to easier-to-work-with CSV files
file.rename("hurdat2-1851-2018-120319.txt", "hurdat2-1851-2018-120319_atlantic.csv")
file.rename("hurdat2-nepac-1949-2018-122019.txt", "hurdat2-nepac-1949-2018-122019_pacific.csv")

In [62]:
# read in the renamed CSV files
atlantic_data = read.csv('hurdat2-1851-2018-120319_atlantic.csv', header = FALSE, stringsAsFactors = FALSE)
pacific_data = read.csv('hurdat2-nepac-1949-2018-122019_pacific.csv', header = FALSE, stringsAsFactors = FALSE)

# apply column names to data
colnames(atlantic_data) = data_row_header
colnames(pacific_data) = data_row_header

# get list of all indices with hurricane headers
atlantic_header_indices = header_locations(atlantic_data)
pacific_header_indices = header_locations(pacific_data)

In [63]:
# create a new timestamp column containing both date and time
atlantic_data = atlantic_data %>% mutate(Timestamp = parse_date_time(paste(atlantic_data$Date, atlantic_data$Time, sep = ' '), "Ymd HM", tz = 'America/Chicago', quiet = TRUE))
pacific_data = pacific_data %>% mutate(Timestamp = parse_date_time(paste(pacific_data$Date, pacific_data$Time, sep = ' '), "Ymd HM", tz = 'America/Chicago', quiet = TRUE))

# move new timestamp column to first column
atlantic_data <- atlantic_data[, c(22, 1:21)]
pacific_data <- pacific_data[, c(22, 1:21)]

In [64]:
# write the cleaned data to CSV files
write.csv(atlantic_data, "data_atlantic.csv")
write.csv(pacific_data, "data_pacific.csv")

In [66]:
# print head of data
print("Atlantic Data")
head(atlantic_data)
print("Pacific Data")
head(pacific_data)

[1] "Atlantic Data"


Timestamp,Date,Time,Record_ID,Storm_Type,Lat,Lon,Speed,Pressure,Wind_Radii_NE_34,...,Wind_Radii_NW_34,Wind_Radii_NE_50,Wind_Radii_SE_50,Wind_Radii_SW_50,Wind_Radii_NW_50,Wind_Radii_NE_64,Wind_Radii_SE_64,Wind_Radii_SW_64,Wind_Radii_NW_64,Size
,AL011851,UNNAMED,14,,,,,,,...,,,,,,,,,,
1851-06-25 00:00:00,18510625,0000,,HU,28.0N,94.8W,80.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,
1851-06-25 06:00:00,18510625,0600,,HU,28.0N,95.4W,80.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,
1851-06-25 12:00:00,18510625,1200,,HU,28.0N,96.0W,80.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,
1851-06-25 18:00:00,18510625,1800,,HU,28.1N,96.5W,80.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,
1851-06-25 21:00:00,18510625,2100,L,HU,28.2N,96.8W,80.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,


[1] "Pacific Data"


Timestamp,Date,Time,Record_ID,Storm_Type,Lat,Lon,Speed,Pressure,Wind_Radii_NE_34,...,Wind_Radii_NW_34,Wind_Radii_NE_50,Wind_Radii_SE_50,Wind_Radii_SW_50,Wind_Radii_NW_50,Wind_Radii_NE_64,Wind_Radii_SE_64,Wind_Radii_SW_64,Wind_Radii_NW_64,Size
,EP011949,UNNAMED,7.0,,,,,,,...,,,,,,,,,,
1949-06-11 00:00:00,19490611,0000,,TS,20.2N,106.3W,45.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,
1949-06-11 06:00:00,19490611,0600,,TS,20.2N,106.4W,45.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,
1949-06-11 12:00:00,19490611,1200,,TS,20.2N,106.7W,45.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,
1949-06-11 18:00:00,19490611,1800,,TS,20.3N,107.7W,45.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,
1949-06-12 00:00:00,19490612,0000,,TS,20.4N,108.6W,45.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,


In [67]:
# map each individual hurricane for easier access
atlantic_map = make_huricane_data_map(atlantic_data, atlantic_header_indices)
pacific_map = make_huricane_data_map(pacific_data, pacific_header_indices)

In [68]:
# list all hurricanes by cyclone number
print("Atlantic Hurricanes")
atlantic_map$names
print("Pacific Hurricanes")
pacific_map$names

[1] "Atlantic Hurricanes"


[1] "Pacific Hurricanes"


In [69]:
# EXAMPLE: get data for first atlantic hurricane
atlantic_map$name_data[1]

Unnamed: 0,Timestamp,Date,Time,Record_ID,Storm_Type,Lat,Lon,Speed,Pressure,Wind_Radii_NE_34,...,Wind_Radii_NW_34,Wind_Radii_NE_50,Wind_Radii_SE_50,Wind_Radii_SW_50,Wind_Radii_NW_50,Wind_Radii_NE_64,Wind_Radii_SE_64,Wind_Radii_SW_64,Wind_Radii_NW_64,Size
2,1851-06-25 00:00:00,18510625,0,,HU,28.0N,94.8W,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
3,1851-06-25 06:00:00,18510625,600,,HU,28.0N,95.4W,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
4,1851-06-25 12:00:00,18510625,1200,,HU,28.0N,96.0W,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
5,1851-06-25 18:00:00,18510625,1800,,HU,28.1N,96.5W,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
6,1851-06-25 21:00:00,18510625,2100,L,HU,28.2N,96.8W,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
7,1851-06-26 00:00:00,18510626,0,,HU,28.2N,97.0W,70,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
8,1851-06-26 06:00:00,18510626,600,,TS,28.3N,97.6W,60,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
9,1851-06-26 12:00:00,18510626,1200,,TS,28.4N,98.3W,60,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
10,1851-06-26 18:00:00,18510626,1800,,TS,28.6N,98.9W,50,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
11,1851-06-27 00:00:00,18510627,0,,TS,29.0N,99.4W,50,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,


In [70]:
# EXAMPLE: get data for first pacific hurricane
pacific_map$name_data[1]

Unnamed: 0,Timestamp,Date,Time,Record_ID,Storm_Type,Lat,Lon,Speed,Pressure,Wind_Radii_NE_34,...,Wind_Radii_NW_34,Wind_Radii_NE_50,Wind_Radii_SE_50,Wind_Radii_SW_50,Wind_Radii_NW_50,Wind_Radii_NE_64,Wind_Radii_SE_64,Wind_Radii_SW_64,Wind_Radii_NW_64,Size
2,1949-06-11 00:00:00,19490611,0,,TS,20.2N,106.3W,45,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
3,1949-06-11 06:00:00,19490611,600,,TS,20.2N,106.4W,45,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
4,1949-06-11 12:00:00,19490611,1200,,TS,20.2N,106.7W,45,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
5,1949-06-11 18:00:00,19490611,1800,,TS,20.3N,107.7W,45,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
6,1949-06-12 00:00:00,19490612,0,,TS,20.4N,108.6W,45,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
7,1949-06-12 06:00:00,19490612,600,,TS,20.5N,109.4W,45,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,
8,1949-06-12 12:00:00,19490612,1200,,TS,20.6N,110.2W,45,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,


In [71]:
# EXAMPLE: get name of hurricane given cyclone number
str_trim(atlantic_data$Time[which(atlantic_data$Date == "AL092011")])

In [None]:
# start plotting, graphing, etc... 
# TODO