In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("/content/titanic.csv")

In [None]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [None]:
# Size of data
df.shape

In [None]:
# Drop the irrelavant columns
cols_to_remove = ["PassengerId","Name","Ticket"]
df = df.drop(cols_to_remove,axis=1)

In [None]:
# Null values check
df.isnull().sum()

In [None]:
# percent of missing values for age, Cabin, Embarked
n = len(df)
for col in df.columns:
  null_count = df[col].isnull().sum()
  if null_count > 0:
    print(f" % of missing value for {col} : {round((null_count / n)*100,2)} %")

In [None]:
# Dropping Cabin
df = df.drop("Cabin",axis=1)

In [None]:
sns.countplot(data=df,x="Embarked")

In [None]:
df["Age"].hist()

In [None]:
df.describe()

In [None]:
# Fill Age Null values with median
# Fill Embarked with mode
df["Age"] = df["Age"].fillna(df["Age"].median())
df["Embarked"] = df["Embarked"].fillna(df["Embarked"].value_counts().idxmax())

In [None]:
df.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64

In [None]:
# Final cleaned data
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


In [None]:
df["Sex"].unique()

array(['male', 'female'], dtype=object)

In [None]:
df["Embarked"].unique()

array(['S', 'C', 'Q'], dtype=object)

In [None]:
from sklearn.preprocessing import LabelEncoder
gender_le = LabelEncoder()
embarked_le = LabelEncoder()

In [None]:
df["Sex"] = gender_le.fit_transform(df["Sex"])
df["Embarked"] = embarked_le.fit_transform(df["Embarked"])

In [None]:
df.head()

In [None]:
gender_le.classes_

array(['female', 'male'], dtype=object)

In [None]:
embarked_le.classes_

In [None]:
X = df.drop("Survived",axis=1)
y = df["Survived"]

In [None]:
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.2,random_state=20)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((712, 7), (179, 7), (712,), (179,))

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train,y_train)

In [None]:
y_pred = lr.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
print("Accuracy Score : ",accuracy_score(y_pred,y_test))

Accuracy Score :  0.8268156424581006


In [None]:
Pclass = int(input("Pclass (1/2/3)"))
Sex	= input("Enter gender (male/female)")
Age	= int(input("Enter Age"))
SibSp = int(input("SibSp (1/2/3)"))
Parch = int(input("Parch (1/2/3)"))
Fare	= float(input("Enter the Fare"))
Embarked = input("Enter Embarked (S/C/Q)")
Sex = gender_le.transform([Sex])[0]
Embarked = embarked_le.transform([Embarked])[0]
data = [[Pclass,	Sex,	Age,	SibSp,	Parch	,Fare	,Embarked]]
print(" Prediction : ",lr.predict(data)[0])

In [None]:
import pickle
pickle.dump(gender_le,file=open("/content/gender_le.pkl","wb"))
pickle.dump(embarked_le,file=open("/content/embarked_le.pkl","wb"))
pickle.dump(lr,file=open("/content/lr.pkl","wb"))

# New Notebook

In [None]:
# Load
import pickle
gender_le = pickle.load(open("/content/gender_le.pkl","rb"))
embarked_le = pickle.load(open("/content/embarked_le.pkl","rb"))
lr = pickle.load(open("/content/lr.pkl","rb"))

In [None]:
Pclass = int(input("Pclass (1/2/3)"))
Sex	= input("Enter gender (male/female)")
Age	= int(input("Enter Age"))
SibSp = int(input("SibSp (1/2/3)"))
Parch = int(input("Parch (1/2/3)"))
Fare	= float(input("Enter the Fare"))
Embarked = input("Enter Embarked (S/C/Q)")
Sex = gender_le.transform([Sex])[0]
Embarked = embarked_le.transform([Embarked])[0]
data = [[Pclass,	Sex,	Age,	SibSp,	Parch	,Fare	,Embarked]]
print(" Prediction : ",lr.predict(data)[0])

Pclass (1/2/3)1
Enter gender (male/female)male
Enter Age34
SibSp (1/2/3)0
Parch (1/2/3)0
Enter the Fare6
Enter Embarked (S/C/Q)S
 Prediction :  0


