In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Load Titanic dataset
df = pd.read_csv('titanic.csv')

# Basic preprocessing
df = df[['Survived', 'Pclass', 'Sex', 'Age', 'Fare']].copy()  # Select useful columns
df.loc[:, 'Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df.loc[:, 'Age'] = df['Age'].fillna(df['Age'].mean())

# Split into features and target
X = df.drop('Survived', axis=1)
y = df['Survived']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Visualize the tree
plt.figure(figsize=(15, 10))
plot_tree(model, feature_names=X.columns, class_names=["Not Survived", "Survived"], filled=True)
plt.show()

     

Accuracy: 0.7430167597765364
