In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

def preprocess(df):
    # Fill missing values
    df['Age'] = df['Age'].fillna(df['Age'].median())
    df['RoomService'] = df['RoomService'].fillna(0)
    df['FoodCourt'] = df['FoodCourt'].fillna(0)
    df['ShoppingMall'] = df['ShoppingMall'].fillna(0)
    df['Spa'] = df['Spa'].fillna(0)
    df['VRDeck'] = df['VRDeck'].fillna(0)
    df['HomePlanet'] = df['HomePlanet'].fillna("Earth")
    df['CryoSleep'] = df['CryoSleep'].fillna(False)
    df['Cabin'] = df['Cabin'].fillna("Unknown/0/Z")
    df['Destination'] = df['Destination'].fillna("TRAPPIST-1e")
    df['VIP'] = df['VIP'].fillna(False)

    df['CryoSleep'] = df['CryoSleep'].astype(bool)
    df['VIP'] = df['VIP'].astype(bool)

    df[['Deck', 'CabinNum', 'Side']] = df['Cabin'].str.split('/', expand=True)

    label_cols = ['HomePlanet', 'Destination', 'Deck', 'Side']
    for col in label_cols:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])

    df.drop(columns=['Name', 'Cabin', 'CabinNum'], inplace=True)

    return df

train_processed = preprocess(train.copy())
test_processed = preprocess(test.copy())

features = [
    'HomePlanet', 'CryoSleep', 'Destination', 'Age',
    'VIP', 'RoomService', 'FoodCourt', 'ShoppingMall',
    'Spa', 'VRDeck', 'Deck', 'Side'
]

X = train_processed[features]
y = train_processed['Transported'].astype(bool)
X_test = test_processed[features]

model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X, y)

preds = model.predict(X_test)

submission = pd.DataFrame({
    "PassengerId": test["PassengerId"],
    "Transported": preds
})

submission.to_csv("spaceship_submission.csv", index=False)
print("🚀 Submission file 'spaceship_submission.csv' is ready!")


  df['CryoSleep'] = df['CryoSleep'].fillna(False)
  df['VIP'] = df['VIP'].fillna(False)
  df['CryoSleep'] = df['CryoSleep'].fillna(False)
  df['VIP'] = df['VIP'].fillna(False)


🚀 Submission file 'spaceship_submission.csv' is ready!
