Load the dataset into a DataFrame.

In [None]:
import pandas as pd

data = pd.read_csv('data.csv')

Get an overview of the data, including column types and counts.

In [None]:
data.info()

Plot histograms to visualize the distribution of numerical features.

In [None]:
import matplotlib.pyplot as plt

plt.hist(data['column_name'])
plt.show()

Create scatter plots to examine relationships between features.

In [None]:
plt.scatter(data['x_column'], data['y_column'])
plt.xlabel('X Axis')
plt.ylabel('Y Axis')
plt.show()

Calculate the correlation matrix to find relationships between variables.

In [None]:
corr_matrix = data.corr()

One-hot encode the 'sex' feature to prepare it for modeling.

In [None]:
data['sex'] = pd.get_dummies(data['sex'], drop_first=True)

Fill missing values with the mean of each column.

In [None]:
data.fillna(data.mean(), inplace=True)

Label encode the 'embarked' feature for categorical handling.

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
data['embarked'] = le.fit_transform(data['embarked'])

Finalize features for modeling by dropping unnecessary columns.

In [None]:
final_features = data.drop(['unwanted_column'], axis=1)

Split the data into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split

X = final_features.drop('target', axis=1)
y = final_features['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Prepare and train the model using a Random Forest classifier.

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train, y_train)

Perform hyperparameter tuning to find the best model parameters.

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {'n_estimators': [10, 50, 100]}
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)

Evaluate the model's performance on the test set.

In [None]:
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

Perform cross-validation to assess the model's generalization ability.

In [None]:
from sklearn.model_selection import cross_val_score

cv_scores = cross_val_score(model, X, y, cv=5)

Save the trained model for future use.

In [None]:
import joblib

joblib.dump(model, 'model.pkl')