# Titanic: Quick EDA and Baseline Model
This notebook loads the Titanic dataset (via seaborn), performs basic EDA, simple preprocessing, and trains a baseline Logistic Regression classifier. Useful for learning data cleaning, visualization, and a first model.

In [None]:
import sys
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
print('Python:', sys.executable)

In [None]:
# Load dataset
df = sns.load_dataset('titanic')
df.head()

In [None]:
# Quick info and missing values
print('Shape:', df.shape)
print('\nInfo:')
df.info()
print('\nMissing values per column:')
print(df.isnull().sum())
print('\nDescribe:')
print(df.describe(include='all'))

In [None]:
# Basic visualizations
plt.figure(figsize=(6,4)); sns.countplot(x='survived', data=df).set_title('Survived count'); plt.show()
plt.figure(figsize=(6,4)); sns.countplot(x='sex', hue='survived', data=df).set_title('Survival by sex'); plt.show()
plt.figure(figsize=(6,4)); sns.histplot(df['age'].dropna(), bins=20).set_title('Age distribution'); plt.show()

In [None]:
# Simple preprocessing + baseline model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
# Select a small set of features
df_mod = df[['survived','pclass','sex','age','sibsp','parch','fare','embarked']].copy()
# Fill missing
df_mod['age'] = df_mod['age'].fillna(df_mod['age'].median())
df_mod['embarked'] = df_mod['embarked'].fillna('S')
# One-hot encode categorical columns
df_mod = pd.get_dummies(df_mod, columns=['sex','embarked'], drop_first=True)
X = df_mod.drop('survived', axis=1)
y = df_mod['survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
pred = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, pred))
print('\nClassification report:\n', classification_report(y_test, pred))

**Next steps:** try feature engineering (title extraction from name), handle missing cabin values, try tree-based models, and tune hyperparameters.