In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Goal -: Given the Features, we need to Predict, if passenger will survived or not

In [None]:
df = pd.read_csv("titanic.csv")
df.head()

# EDA

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
sns.countplot(data=df, x="Survived", palette="RdBu_r")

In [None]:
sns.countplot(data=df, x="Survived", hue="Sex")

In [None]:
sns.countplot(data=df, x="Survived", hue="Pclass", palette="rainbow")

In [None]:
sns.countplot(data=df, x="SibSp")

In [None]:
sns.countplot(data=df, x="Parch")

In [None]:
df.describe()

In [None]:
sns.distplot(df["Fare"])

In [None]:
df["Fare"].hist(color="green")

In [None]:
sns.distplot(df["Age"])

In [None]:
sns.pairplot(df)

In [None]:
sns.pairplot(df, hue="Survived")

# Data Cleaning & Preprocessing

In [None]:
df.isna().sum()

In [None]:
sns.heatmap(df.isnull(), yticklabels=False, cbar=False, cmap="viridis")

In [None]:
sns.boxplot(x="Pclass", y="Age", data=df)
plt.grid()

In [None]:
def fillage(cols):
    age = cols[0]
    pclass = cols[1]
    
    if(pd.isnull(age)):
        if(pclass==1):
            return 38
        elif(pclass==2):
            return 29
        else:
            return 24
    else:
        return age

In [None]:
df["Age"] =df[["Age", "Pclass"]].apply(fillage, axis=1)

In [None]:
sns.heatmap(df.isnull(), yticklabels=False, cbar=False, cmap="viridis")

In [None]:
df.drop("Cabin", axis=1, inplace=True)

In [None]:
sns.heatmap(df.isnull(), yticklabels=False, cbar=False, cmap="viridis")

In [None]:
df.isna().sum()

In [None]:
df.dropna(inplace=True)

In [None]:
df.isna().sum()

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df.drop(["PassengerId", "Ticket", "Name"], axis=1, inplace=True)

In [None]:
df.head()

# Data Preprocessing

In [None]:
x = df.iloc[:, 1:]
y = df.iloc[:, 0]

In [None]:
x.Sex.value_counts()

In [None]:
x.Embarked.value_counts()

In [None]:
from sklearn.preprocessing import OrdinalEncoder
oe = OrdinalEncoder()
x[["Sex", "Embarked"]] = oe.fit_transform(x[["Sex", "Embarked"]])

In [None]:
x

In [None]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.3, 
                                                random_state=1)

# Model Building

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(xtrain, ytrain)
ypred = logreg.predict(xtest)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
ac = accuracy_score(ytest, ypred)
cm = confusion_matrix(ytest, ypred)
cr = classification_report(ytest, ypred)

print(f"Accuracy : {ac}\n{cm}\n\n{cr}")

In [None]:
train = logreg.score(xtrain, ytrain)
test = logreg.score(xtest, ytest)

print(f"Training Score : {train}\nTesting Score : {test}")

In [None]:
#high bias + low variance => underfitting

In [None]:
y.value_counts()

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.3, 
                                                random_state=1, stratify=y)

logreg = LogisticRegression()
logreg.fit(xtrain, ytrain)
ypred = logreg.predict(xtest)

ac = accuracy_score(ytest, ypred)
cm = confusion_matrix(ytest, ypred)
cr = classification_report(ytest, ypred)

print(f"Accuracy : {ac}\n{cm}\n\n{cr}")

In [None]:
train = logreg.score(xtrain, ytrain)
test = logreg.score(xtest, ytest)

print(f"Training Score : {train}\nTesting Score : {test}")

In [None]:
#low bias + low variance => best fit

# Forecast the New Observation

In [None]:
x