In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE

In [3]:
titanic = pd.read_csv('titanic_train.csv')

In [4]:
titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,207,0,3,"Backstrom, Mr. Karl Alfred",male,32.0,1,0,3101278,15.85,,S
1,147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27.0,0,0,350043,7.7958,,S
2,212,1,2,"Cameron, Miss. Clear Annie",female,35.0,0,0,F.C.C. 13528,21.0,,S
3,845,0,3,"Culumovic, Mr. Jeso",male,17.0,0,0,315090,8.6625,,S
4,406,0,2,"Gale, Mr. Shadrach",male,34.0,1,0,28664,21.0,,S


In [5]:
titanic.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            134
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          555
Embarked         1
dtype: int64

In [6]:
titanic["Embarked"].value_counts()

Embarked
S    512
C    141
Q     58
Name: count, dtype: int64

In [7]:
titanic['Age'] = titanic['Age'].fillna(titanic['Age'].median())

titanic['Embarked'] = titanic['Embarked'].fillna(titanic['Embarked'].mode()[0])

titanic['Fare'] = titanic['Fare'].fillna(titanic['Fare'].median())

titanic['Sex'] = titanic['Sex'].map({'male': 0, 'female': 1})

titanic['Embarked'] = titanic['Embarked'].astype('category').cat.codes

In [8]:
titanic["Embarked"].value_counts()

Embarked
2    513
0    141
1     58
Name: count, dtype: int64

In [9]:
titanic['Familysize'] = titanic['SibSp'] + titanic['Parch'] + 1

titanic['Isalone'] = (titanic['Familysize'] == 1).astype(int)

titanic['HasCabin'] = titanic['Cabin'].notnull().astype(int)

titanic['Title'] = titanic['Name'].str.extract(' ([A-Za-z]+)\.', expand=False).map(
    {'Mr': 0, 'Miss': 1, 'Mrs': 2, 'Master': 3, 'Rare': 4}
).fillna(4)

titanic['Pclass_Fare'] = titanic['Pclass'] * titanic['Fare']

titanic['Age_Fare'] = titanic['Age'] * titanic['Fare']

In [10]:
titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Familysize,Isalone,HasCabin,Title,Pclass_Fare,Age_Fare
0,207,0,3,"Backstrom, Mr. Karl Alfred",0,32.0,1,0,3101278,15.85,,2,2,0,0,0.0,47.55,507.2
1,147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",0,27.0,0,0,350043,7.7958,,2,1,1,0,0.0,23.3874,210.4866
2,212,1,2,"Cameron, Miss. Clear Annie",1,35.0,0,0,F.C.C. 13528,21.0,,2,1,1,0,1.0,42.0,735.0
3,845,0,3,"Culumovic, Mr. Jeso",0,17.0,0,0,315090,8.6625,,2,1,1,0,0.0,25.9875,147.2625
4,406,0,2,"Gale, Mr. Shadrach",0,34.0,1,0,28664,21.0,,2,2,0,0,0.0,42.0,714.0


In [11]:
X = titanic[['Pclass', 'Sex', 'Age', 'Fare', 'Embarked', 'Familysize', 'Isalone', 'HasCabin', 'Title', 'Pclass_Fare', 'Age_Fare']]
y = titanic['Survived']


In [12]:
titanic["Survived"].value_counts()

Survived
0    440
1    272
Name: count, dtype: int64

In [13]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [14]:
y_resampled.value_counts()

Survived
0    440
1    440
Name: count, dtype: int64

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [16]:
param_distributions = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

In [17]:
rf = RandomForestClassifier(random_state=42)
random_search = RandomizedSearchCV(rf, param_distributions, n_iter=10, cv=3, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

In [18]:
best_rf = random_search.best_estimator_
y_pred = best_rf.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {rf_accuracy:.2f}")

Random Forest Accuracy: 0.84
