In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 1. Load dataset
data = pd.read_csv("Titanic.csv")

print("Dataset loaded successfully")
print("Total rows:", data.shape[0])
print("Total columns:", data.shape[1])

# 2. Select required columns
data = data[['Survived', 'Pclass', 'Sex', 'Age', 'Fare']]

print("\nSelected columns:")
print(data.head())

# 3. Handle missing Age values
data['Age'] = data['Age'].fillna(data['Age'].mean())

# 4. Convert Sex to numeric
data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})

# 5. Split input and output
X = data[['Pclass', 'Sex', 'Age', 'Fare']]
y = data['Survived']

# 6. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=1
)

print("\nTraining samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])

# 7. Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# 8. Predictions
predictions = model.predict(X_test)

# 9. Accuracy
acc = accuracy_score(y_test, predictions)

print("\nPredicted survivors count:", sum(predictions))
print("Actual survivors count:", sum(y_test))
print("Final Accuracy:", acc)


Dataset loaded successfully
Total rows: 891
Total columns: 12

Selected columns:
   Survived  Pclass     Sex   Age     Fare
0         0       3    male  22.0   7.2500
1         1       1  female  38.0  71.2833
2         1       3  female  26.0   7.9250
3         1       1  female  35.0  53.1000
4         0       3    male  35.0   8.0500

Training samples: 623
Testing samples: 268

Predicted survivors count: 101
Actual survivors count: 115
Final Accuracy: 0.7835820895522388
