In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import geopandas as gpd
import seaborn as sns

In [3]:
data_path = os.getcwd()+"/datasets"
atlantic_path = data_path + "/atlantic.csv"
pacific_path = data_path + "/pacific.csv"

atlantic = pd.read_csv(atlantic_path)
pacific = pd.read_csv(pacific_path)

In [4]:
atlantic.head()

Unnamed: 0,ID,Name,Date,Time,Event,Status,Latitude,Longitude,Maximum Wind,Minimum Pressure,...,Low Wind SW,Low Wind NW,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW
0,AL011851,UNNAMED,18510625,0,,HU,28.0N,94.8W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1,AL011851,UNNAMED,18510625,600,,HU,28.0N,95.4W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,AL011851,UNNAMED,18510625,1200,,HU,28.0N,96.0W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
3,AL011851,UNNAMED,18510625,1800,,HU,28.1N,96.5W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
4,AL011851,UNNAMED,18510625,2100,L,HU,28.2N,96.8W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999


In [5]:
atlantic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49105 entries, 0 to 49104
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   ID                49105 non-null  object
 1   Name              49105 non-null  object
 2   Date              49105 non-null  int64 
 3   Time              49105 non-null  int64 
 4   Event             49105 non-null  object
 5   Status            49105 non-null  object
 6   Latitude          49105 non-null  object
 7   Longitude         49105 non-null  object
 8   Maximum Wind      49105 non-null  int64 
 9   Minimum Pressure  49105 non-null  int64 
 10  Low Wind NE       49105 non-null  int64 
 11  Low Wind SE       49105 non-null  int64 
 12  Low Wind SW       49105 non-null  int64 
 13  Low Wind NW       49105 non-null  int64 
 14  Moderate Wind NE  49105 non-null  int64 
 15  Moderate Wind SE  49105 non-null  int64 
 16  Moderate Wind SW  49105 non-null  int64 
 17  Moderate Win

In [6]:
pacific.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26137 entries, 0 to 26136
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   ID                26137 non-null  object
 1   Name              26137 non-null  object
 2   Date              26137 non-null  int64 
 3   Time              26137 non-null  int64 
 4   Event             26137 non-null  object
 5   Status            26137 non-null  object
 6   Latitude          26137 non-null  object
 7   Longitude         26137 non-null  object
 8   Maximum Wind      26137 non-null  int64 
 9   Minimum Pressure  26137 non-null  int64 
 10  Low Wind NE       26137 non-null  int64 
 11  Low Wind SE       26137 non-null  int64 
 12  Low Wind SW       26137 non-null  int64 
 13  Low Wind NW       26137 non-null  int64 
 14  Moderate Wind NE  26137 non-null  int64 
 15  Moderate Wind SE  26137 non-null  int64 
 16  Moderate Wind SW  26137 non-null  int64 
 17  Moderate Win

### Data Preprocessing

Converting Longitude and Latitude to floats from strings

In [7]:
# Function to move the last character to the beginning
def move_last_char_to_beginning(s):
    if len(s) > 1:  
        return s[-1] + s[:-1]
    return s 

# Apply the function to the column (for example, 'Latitude')
atlantic['Latitude'] = atlantic['Latitude'].apply(move_last_char_to_beginning)
atlantic['Longitude'] = atlantic['Longitude'].apply(move_last_char_to_beginning)
pacific['Latitude'] = pacific['Latitude'].apply(move_last_char_to_beginning)
pacific['Longitude'] = pacific['Longitude'].apply(move_last_char_to_beginning)

atlantic['Latitude'] = atlantic['Latitude'].str.replace('N', '').str.replace('S', '-').astype(float)
atlantic['Longitude'] = atlantic['Longitude'].str.replace('E', '').str.replace('W', '-').astype(float)

pacific['Latitude'] = pacific['Latitude'].str.replace('N', '').str.replace('S', '-').astype(float)
pacific['Longitude'] = pacific['Longitude'].str.replace('E', '').str.replace('W', '-').astype(float)

In [8]:
atlantic['Basin'] = 'Atlantic'
pacific['Basin'] = 'Pacific'

hurricanes = combined_df = pd.concat([atlantic, pacific], ignore_index=True)
hurricanes.head()

Unnamed: 0,ID,Name,Date,Time,Event,Status,Latitude,Longitude,Maximum Wind,Minimum Pressure,...,Low Wind NW,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW,Basin
0,AL011851,UNNAMED,18510625,0,,HU,28.0,-94.8,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,Atlantic
1,AL011851,UNNAMED,18510625,600,,HU,28.0,-95.4,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,Atlantic
2,AL011851,UNNAMED,18510625,1200,,HU,28.0,-96.0,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,Atlantic
3,AL011851,UNNAMED,18510625,1800,,HU,28.1,-96.5,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,Atlantic
4,AL011851,UNNAMED,18510625,2100,L,HU,28.2,-96.8,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,Atlantic


In [13]:
hurricanes['Date'] = pd.to_datetime(hurricanes['Date'], format='%Y%m%d')
hurricanes['Time'] = hurricanes['Time'].astype(str) # need to check later to see if this works
hurricanes['Time'] = hurricanes['Time'].str.zfill(4)
hurricanes['Time'] = pd.to_datetime(hurricanes['Time'], format='%H%M').dt.time

In [14]:
hurricanes.dtypes

ID                          object
Name                        object
Date                datetime64[ns]
Time                        object
Event                       object
Status                      object
Latitude                   float64
Longitude                  float64
Maximum Wind                 int64
Minimum Pressure             int64
Low Wind NE                  int64
Low Wind SE                  int64
Low Wind SW                  int64
Low Wind NW                  int64
Moderate Wind NE             int64
Moderate Wind SE             int64
Moderate Wind SW             int64
Moderate Wind NW             int64
High Wind NE                 int64
High Wind SE                 int64
High Wind SW                 int64
High Wind NW                 int64
Basin                       object
dtype: object