In [40]:
# Import necessary libraries
import pandas as pd  # For handling dataframes and CSV files
import numpy as np  # For numerical operations (not directly used in this code)

# Load the training and test datasets from the specified paths
train_data = pd.read_csv('https://raw.githubusercontent.com/minaghaderi/RandomForestModel_Titanic/main/train.csv')  # Training dataset
test_data = pd.read_csv('https://raw.githubusercontent.com/minaghaderi/RandomForestModel_Titanic/main/test.csv')  # Test dataset

# Import the RandomForestClassifier from scikit-learn
from sklearn.ensemble import RandomForestClassifier 

# Extract the target variable 'Survived' from the training dataset
y = train_data['Survived']  # Target variable: whether the passenger survived (1) or not (0)

# Prepare the feature sets for training and testing
# Select relevant columns and one-hot encode categorical variables like "Sex"
x_train = pd.get_dummies(train_data[["Pclass", "Sex", "SibSp", "Parch"]])  # Features for training
x_test = pd.get_dummies(test_data[["Pclass", "Sex", "SibSp", "Parch"]])  # Features for testing

# Initialize the Random Forest Classifier with specific parameters
# n_estimators=100: Use 100 trees in the forest
# max_depth=5: Limit the depth of each tree to 5 levels to prevent overfitting
# random_state=1: Ensures reproducibility of results
model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)

# Train the Random Forest model on the training data
model.fit(x_train, y)

# Predict the survival outcomes for the test dataset
prediction = model.predict(x_test)

# Create a DataFrame with PassengerId and the predicted Survived values
output = pd.DataFrame({
    'PassengerId': test_data.PassengerId,  # Use PassengerId from the test dataset
    'Survived': prediction  # Predicted survival outcomes (0 or 1)
})

# Save the predictions to a CSV file named 'Prediction_Titanic.csv'
# index=False: Prevents writing row indices into the CSV file
output.to_csv('Prediction_Titanic.csv', index=False)
