In [None]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

plt.style.use("seaborn-dark")
np.random.seed(42)

In [None]:
pacific = pd.read_csv("../input/hurricane-database/pacific.csv").drop(['ID', 'Name'], axis=1)
pacific.head()

In [None]:
pacific.shape

In [None]:
pacific.columns

In [None]:
pacific.isna().sum().any()

In [None]:
pacific.Status.unique()

In [None]:
sns.countplot(x='Status', data=pacific)
print(pacific.Status.value_counts())

In [None]:
pacific.loc[:,'Status'] = pacific['Status'].str.strip()

In [None]:
pacific.Event.unique()

In [None]:
plt.figure(figsize=(17,5))
sns.countplot(pacific.Event, hue=pacific.Status)
plt.legend(loc='upper right')

In [None]:
pacific.Time.unique()

In [None]:
plt.figure(figsize=(17,5))
sns.countplot(pacific.Time, hue=pacific.Status)
plt.legend('')
plt.xticks(rotation=90)
plt.show()

In [None]:
a = pacific.groupby(by='Status')['Time'].mean()
sns.barplot(x=a.index, y=a)

In [None]:
date = pd.to_datetime(pacific['Date'], format='%Y%m%d')
pacific['Year'] = pd.DatetimeIndex(date).year
pacific['Month'] = pd.DatetimeIndex(date).month
pacific['Day'] = pd.DatetimeIndex(date).day

In [None]:
cols = ['Year', 'Month', 'Day']
for i, col in enumerate(cols):
    plt.figure(i)
    plt.figure(figsize=(17,5))
    sns.countplot(x=col, hue='Status', data=pacific)
    plt.legend('')
    plt.xticks(rotation=90)

In [None]:
cols = ['Year', 'Month', 'Day']
for i, col in enumerate(cols):
    plt.figure(i)
    sns.scatterplot(pacific[col], pacific['Status'])

In [None]:
a = pacific['Latitude'].unique()[0]
str(a).replace('N', '')

In [None]:
directions =['N', 'S', 'E', 'W']
for dir_ in directions:
    pacific.loc[:,'Latitude'] = pacific['Latitude'].apply(lambda x : str(x).replace(dir_,''))
    pacific.loc[:,'Longitude'] = pacific['Longitude'].apply(lambda x : str(x).replace(dir_,''))

In [None]:
sns.distplot(pacific['Latitude'].astype('float'))

In [None]:
sns.distplot(pacific['Longitude'].astype('float'))

In [None]:
sns.distplot(pacific['Maximum Wind'])

In [None]:
# colums = ['Minimum Pressure', 'Low Wind NE', 'Low Wind SE',
#        'Low Wind SW', 'Low Wind NW', 'Moderate Wind NE', 'Moderate Wind SE',
#        'Moderate Wind SW', 'Moderate Wind NW', 'High Wind NE', 'High Wind SE',
#        'High Wind SW', 'High Wind NW']

# for i, col in enumerate(colums):
#     plt.figure(i)
#     sns.distplot(pacific[col])

In [None]:
features = ['Time', 'Status', 'Latitude', 'Longitude',
       'Maximum Wind', 'Minimum Pressure', 'Low Wind NE', 'Low Wind SE',
       'Low Wind SW', 'Low Wind NW', 'Moderate Wind NE', 'Moderate Wind SE',
       'Moderate Wind SW', 'Moderate Wind NW', 'High Wind NE', 'High Wind SE',
       'High Wind SW', 'High Wind NW', 'Year', 'Month', 'Day']

label = 'Status'

In [None]:
from sklearn.preprocessing import LabelEncoder

y = pacific['Status']

le = LabelEncoder()
le.fit(y)
pacific.loc[:,'Status'] = le.transform(y)
print(le.classes_)

In [None]:
X = pacific[features]
y = pacific[label]

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)
pca.fit(X)
print("---Explained Variance Ratio---")
print(pca.explained_variance_ratio_.sum()*100)
X_pca = pca.transform(X)

In [None]:
X_pca.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =train_test_split(X, y,
                                                   stratify = y,
                                                   test_size = 0.20)

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=30000).fit(X_train, y_train)

# print(model.coef_)
# print(model.intercept_)

# y_pred = model.predict(X_test)


#Init
model_for_cv = model

from sklearn.model_selection import cross_val_score
scores = cross_val_score(model_for_cv, X_train, y_train, cv=5, scoring='f1_macro')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

In [None]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()

model_for_cv = clf

from sklearn.model_selection import cross_val_score
scores = cross_val_score(model_for_cv, X_train, y_train, cv=5, scoring='f1_macro')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

In [None]:
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

from sklearn.metrics import classification_report
print("----Classification Report----")
print(classification_report(y_test, y_pred))