In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


In [None]:

# Load the data
train_df = pd.read_csv('F:/MLpractrice/st/data/train.csv')
test_df = pd.read_csv('F:/MLpractrice/st/data/test.csv')


In [None]:


# Explore the data (optional step)
print(train_df.head())
print(train_df.info())


In [None]:


# Define features and target
features = ['HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'Age', 'VIP',
            'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']
target = 'Transported'


In [None]:

# Handle missing values if any (example approach)
train_df.fillna(method='ffill', inplace=True)
test_df.fillna(method='ffill', inplace=True)


In [None]:


# Combine train and test data for consistent one-hot encoding
combined_df = pd.concat([train_df[features], test_df[features]])

# Encode categorical variables (example approach using one-hot encoding)
combined_encoded = pd.get_dummies(combined_df, columns=['HomePlanet', 'Cabin', 'Destination'])

# Split back into train and test sets
train_encoded = combined_encoded.iloc[:len(train_df)]
test_encoded = combined_encoded.iloc[len(train_df):]

# Split data for training and validation
X_train, X_valid, y_train, y_valid = train_test_split(train_encoded, train_df[target], 
                                                      test_size=0.2, random_state=42)

# Initialize and train a model (example with Random Forest)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


In [None]:

# Predict on validation set and evaluate
y_pred = model.predict(X_valid)
accuracy = accuracy_score(y_valid, y_pred)
print(f'Validation Accuracy: {accuracy:.4f}')

# Train on full training data and predict on test data
model.fit(train_encoded, train_df[target])
test_predictions = model.predict(test_encoded)

# Create a submission dataframe
submission_df = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Transported': test_predictions
})



In [None]:

# Convert predictions to boolean (if necessary)
submission_df['Transported'] = submission_df['Transported'].astype(bool)

# Save submission to CSV file
submission_df.to_csv('F:/MLpractrice/st/submission.csv', index=False)

# Display sample of submission file
print(submission_df.head())