# Spaceship Multinomial Naive Bayes Submission

In [None]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Bring the Data In

In [None]:
X_df = pd.read_csv('spaceship_train_X_v2.csv')
y_df = pd.read_csv('spaceship_train_y.csv')
X_submission = pd.read_csv('spaceship_test_X_v2.csv')

y_df.drop('Unnamed: 0', axis=1, inplace=True)
X_df.head()

Unnamed: 0,PassengerId,CryoSleep,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Expenditure,NoSpending,...,CabinSide_Z,CabinDeck_A,CabinDeck_B,CabinDeck_C,CabinDeck_D,CabinDeck_E,CabinDeck_F,CabinDeck_G,CabinDeck_T,AgeEncoded
0,0001_01,0,0,-0.33753,-0.283865,-0.287383,-0.273826,-0.265831,-0.518357,1,...,0,0,1,0,0,0,0,0,0,3.0
1,0002_01,0,0,-0.172455,-0.278226,-0.245562,0.213965,-0.227033,-0.256582,0,...,0,0,0,0,0,0,1,0,0,2.0
2,0003_01,0,1,-0.272409,1.956643,-0.287383,5.692512,-0.222625,3.174596,0,...,0,1,0,0,0,0,0,0,0,4.0
3,0003_02,0,0,-0.33753,0.519986,0.33324,2.68402,-0.095651,1.322607,0,...,0,1,0,0,0,0,0,0,0,2.0
4,0004_01,0,0,0.121349,-0.240007,-0.034784,0.228181,-0.264067,-0.130318,0,...,0,0,0,0,0,0,1,0,0,1.0


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, stratify=y_df, test_size=0.1, train_size=0.9, random_state=1)

# Ini buat ganti y_train and y_test ke 1D array
# y_train = y_train['Transported'].values
# y_test = y_test['Transported'].values

# Model Training

In [None]:
# Encode target variable to numerical values
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [None]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Define the parameter grid for GridSearchCV
param_grid = {
    'alpha': [0.001, 0.01, 0.1, 1, 10]
}

In [None]:
multi_nb = MultinomialNB()

# Create a GridSearchCV object to find optimal hyperparameters
grid_search = GridSearchCV(multi_nb, param_grid, cv=5, scoring='accuracy')

# Train the model with hyperparameter tuning
grid_search.fit(X_train_scaled, y_train_encoded)

# Get the best model and its parameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_

# Make predictions on the test set using the best model
y_pred = best_model.predict(X_test_scaled)

# Evaluate model accuracy
accuracy = accuracy_score(y_test_encoded, y_pred)
print("Accuracy:", accuracy)
print("Best Hyperparameters:", best_params)

Accuracy: 0.7413793103448276
Best Hyperparameters: {'alpha': 0.001}


In [None]:
# Handle negative values by clipping
clip_value = 0.001
for feature in X_df.select_dtypes(include=['float64']).columns:
    X_df[feature] = np.clip(X_df[feature], clip_value, np.inf)


# Submission Prediction

In [None]:
# Retrain the model using all data
best_model.fit(X_df, y_df['Transported'].values)

In [None]:
#y_train = y_df['Transported'].values

In [None]:
X_submission_scaled = scaler.transform(X_submission)
y_submission = best_model.predict(X_submission_scaled)



In [None]:
submission_df = pd.DataFrame({'PassengerId': X_submission['PassengerId'], 'Transported': y_submission})
submission_df['Transported'] = submission_df['Transported'].astype(bool)
submission_df

Unnamed: 0,PassengerId,Transported
0,0013_01,True
1,0018_01,False
2,0019_01,True
3,0021_01,True
4,0023_01,False
...,...,...
4272,9266_02,True
4273,9269_01,False
4274,9271_01,True
4275,9273_01,False


## Export CSV

In [None]:
submission_df.to_csv('spaceship_multiNB_hm.csv', index=False)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=2a79941c-6614-47fe-9427-0e9f23998893' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>