In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

Calling Train Data

In [None]:
train_data = pd.read_csv('train.csv')


Preprocessing

In [None]:
train_data = train_data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
train_data['FamilySize'] = train_data['SibSp'] + train_data['Parch'] + 1

Impute the Missing values and split the value

In [None]:
imputer = SimpleImputer(strategy='median')
train_data[['Age', 'Fare']] = imputer.fit_transform(train_data[['Age', 'Fare']])
train_data = pd.get_dummies(train_data, columns=['Sex', 'Embarked'])
X = train_data.drop('Survived', axis=1)
y = train_data['Survived']

Split the data into training and valid set

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)


Prepared the Random forest classifier for the grid

In [None]:
param_grid = {
    'n_estimators': [20,50,100, 200, 300],
    'max_depth': [None, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [None]:
model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)

prepared the model for a accuracy

In [None]:
model = grid_search.best_estimator_
y_pred = model.predict(X_val_scaled)
acc = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", acc)

Validation Accuracy: 0.8044692737430168


Calling for test data

In [None]:
test_data = pd.read_csv('test.csv')

Preprocessing

In [None]:
test_data = test_data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
test_data['FamilySize'] = test_data['SibSp'] + test_data['Parch'] + 1
test_data[['Age', 'Fare']] = imputer.transform(test_data[['Age', 'Fare']])
test_data = pd.get_dummies(test_data, columns=['Sex', 'Embarked'])

Feature scaling for test data

In [None]:
test_data_scaled = scaler.transform(test_data)
test_predictions = model.predict(test_data_scaled)

Prepared the Result excel file

In [None]:
Result = pd.DataFrame({'PassengerId': pd.read_csv('test.csv')['PassengerId'], 'Survived': test_predictions})
Result.to_csv('result.csv', index=False)