# Titanic Mini-Project
This project demonstrates data preprocessing, analysis, visualization, and a basic machine learning model using a sample Titanic dataset.

In [None]:
# Import necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score


## Step 1: Load Dataset

In [None]:
# Load the sample dataset
df = pd.read_csv("sample_titanic.csv")
df.head()

## Step 2: Data Cleaning & Preprocessing

In [None]:
# Remove duplicates
df.drop_duplicates(inplace=True)

# Fill missing values if any (here we don't have any in this small dataset)
df.dropna(inplace=True)

# Convert categorical columns to numerical
df['sex'] = df['sex'].map({'male': 0, 'female': 1})
df['embarked'] = df['embarked'].map({'S': 0, 'C': 1, 'Q': 2})

df.head()

## Step 3: Exploratory Data Analysis (EDA)

In [None]:
# Summary statistics
df.describe()

In [None]:
# Correlation analysis
df.corr(numeric_only=True)

## Step 4: Visualization

In [None]:
# Survival Count
sns.countplot(x='survived', data=df)
plt.title('Survival Count')
plt.show()

In [None]:
# Age Distribution
sns.histplot(df['age'], kde=True)
plt.title('Age Distribution')
plt.show()

In [None]:
# Fare vs Class
sns.boxplot(x='pclass', y='fare', data=df)
plt.title('Fare vs Passenger Class')
plt.show()

## Step 5: Basic Model Implementation - Logistic Regression

In [None]:
# Features and Target
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked']]
y = df['survived']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))