In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder


In [2]:
data=pd.read_csv(r"atlantic.csv")

In [3]:
data.head()

Unnamed: 0,ID,Name,Date,Time,Event,Status,Latitude,Longitude,Maximum Wind,Minimum Pressure,...,Low Wind SW,Low Wind NW,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW
0,AL011851,UNNAMED,18510625,0,,HU,28.0N,94.8W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1,AL011851,UNNAMED,18510625,600,,HU,28.0N,95.4W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,AL011851,UNNAMED,18510625,1200,,HU,28.0N,96.0W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
3,AL011851,UNNAMED,18510625,1800,,HU,28.1N,96.5W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
4,AL011851,UNNAMED,18510625,2100,L,HU,28.2N,96.8W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999


In [4]:
def convert_lat_long(value):
    if 'N' in value:
        return float(value[:-1])
    elif 'S' in value:
        return -float(value[:-1])
    elif 'E' in value:
        return float(value[:-1])
    elif 'W' in value:
        return -float(value[:-1])
    else:
        return np.nan

data['Latitude'] = data['Latitude'].apply(convert_lat_long)
data['Longitude'] = data['Longitude'].apply(convert_lat_long)

In [5]:
# Replace -999 with NaN
data = data.replace(-999, np.nan)

# Fill NaN values with the mean of their respective numeric columns
numeric_columns = data.select_dtypes(include=[np.number]).columns
data[numeric_columns] = data[numeric_columns].fillna(data[numeric_columns].mean())

# Display the first few rows of the updated dataset
data.head()


Unnamed: 0,ID,Name,Date,Time,Event,Status,Latitude,Longitude,Maximum Wind,Minimum Pressure,...,Low Wind SW,Low Wind NW,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW
0,AL011851,UNNAMED,18510625,0,,HU,28.0,-94.8,80,992.24425,...,48.647188,59.156393,24.641952,23.029894,15.427293,18.403141,8.110117,7.35771,5.13089,6.269211
1,AL011851,UNNAMED,18510625,600,,HU,28.0,-95.4,80,992.24425,...,48.647188,59.156393,24.641952,23.029894,15.427293,18.403141,8.110117,7.35771,5.13089,6.269211
2,AL011851,UNNAMED,18510625,1200,,HU,28.0,-96.0,80,992.24425,...,48.647188,59.156393,24.641952,23.029894,15.427293,18.403141,8.110117,7.35771,5.13089,6.269211
3,AL011851,UNNAMED,18510625,1800,,HU,28.1,-96.5,80,992.24425,...,48.647188,59.156393,24.641952,23.029894,15.427293,18.403141,8.110117,7.35771,5.13089,6.269211
4,AL011851,UNNAMED,18510625,2100,L,HU,28.2,-96.8,80,992.24425,...,48.647188,59.156393,24.641952,23.029894,15.427293,18.403141,8.110117,7.35771,5.13089,6.269211


In [6]:
# Extract 'Year', 'Month', and 'Day' from the 'Date' column
data['Year'] = data['Date'].astype(str).str[:4].astype(int)
data['Month'] = data['Date'].astype(str).str[4:6].astype(int)
data['Day'] = data['Date'].astype(str).str[6:8].astype(int)

# Define the feature matrix (X) and the target vector (y)
X = data[['Latitude', 'Longitude', 'Moderate Wind NE', 'Moderate Wind SE', 'Moderate Wind SW', 'Moderate Wind NW', 'Year', 'Month', 'Day']]
y = data['Maximum Wind']



In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [8]:
!pip install scikit-learn



In [9]:
!pip install --upgrade numpy --user




In [10]:
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
# Impute missing values using the mean
imputer = SimpleImputer(strategy='mean')  # Replace missing values with the mean
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)  # Use the same imputer fitted on training data

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 100.08923356501336


In [11]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')


Mean Squared Error: 100.08923356501336
R^2 Score: 0.8707792990764643


In [12]:
def predict_max_wind(latitude, longitude, moderate_wind_ne, moderate_wind_se, moderate_wind_sw, moderate_wind_nw, year, month, day):
    input_data = np.array([[latitude, longitude, moderate_wind_ne, moderate_wind_se, moderate_wind_sw, moderate_wind_nw, year, month, day]])
    input_data = imputer.transform(input_data)  # Apply the imputer to the input data
    prediction = model.predict(input_data)
    return prediction[0]


In [13]:
latitude = 25.0
longitude = -80.0
moderate_wind_ne = 40
moderate_wind_se = 35
moderate_wind_sw = 35
moderate_wind_nw = 40
year = 2023
month = 9
day = 15

predicted_max_wind = predict_max_wind(latitude, longitude, moderate_wind_ne, moderate_wind_se, moderate_wind_sw, moderate_wind_nw, year, month, day)
print(f'Predicted Maximum Wind: {predicted_max_wind} knots')

Predicted Maximum Wind: 95.05 knots


