Load the Titanic dataset into a pandas DataFrame.

In [None]:
import pandas as pd
df = pd.read_csv('titanic.csv')

Preview the data to understand its structure and contents.

In [None]:
df.info()

Drop unnecessary columns to clean the dataset.

In [None]:
df.drop(columns=['Cabin', 'Ticket'], inplace=True)

Handle missing values by filling them with the mean.

In [None]:
df.fillna(df.mean(), inplace=True)

Create categorical features for age groups.

In [None]:
df['Age_Category'] = pd.cut(df['Age'], bins=[0, 12, 18, 35, 60, 100], labels=['Child', 'Teen', 'Adult', 'Senior', 'Elderly'])

Convert categorical variables into dummy/indicator variables.

In [None]:
df = pd.get_dummies(df, columns=['Embarked'], drop_first=True)

Visualize the survival rate by passenger class using a bar plot.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.barplot(x='Pclass', y='Survived', data=df)
plt.title('Age Survival Rate')
plt.show()

Visualize the distribution of fare prices using a histogram.

In [None]:
sns.histplot(df['Fare'], bins=30)
plt.title('Fare Distribution')
plt.show()

Visualize survival rates based on embarkation point.

In [None]:
sns.barplot(x='Embarked', y='Survived', data=df)
plt.title('Explore Embarked')
plt.show()

Split the dataset into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split
X = df.drop('Survived', axis=1)
y = df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Train a Logistic Regression model using the training set.

In [None]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

Evaluate the logistic regression model performance.

In [None]:
log_reg_score = log_reg.score(X_test, y_test)

Train a Random Forest model on the training data.

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

Evaluate the performance of the Random Forest model.

In [None]:
rf_score = rf_model.score(X_test, y_test)

Prepare the submission file with predicted survival outcomes.

In [None]:
submission = pd.DataFrame({'PassengerId': df['PassengerId'], 'Survived': log_reg.predict(X)} )
submission.to_csv('submission.csv', index=False)