#Important Libraries

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn

# Read Data

In [None]:
df = pd.read_csv('titanic.csv')

In [None]:
df

#Explore Data in Depth

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.dtypes

In [None]:
df.count()

In [None]:
df.isnull()

In [None]:
df.notnull()

In [None]:
df.isnull().sum()

In [None]:
df.head()

In [None]:
df.head(10)

In [None]:
df['Pclass'].head()

In [None]:
df.tail()

In [None]:
df.tail(10)

In [None]:
df.sample()

In [None]:
df.sample(5)

In [None]:
df[0:10]

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.describe().T

In [None]:
df['Survived'].value_counts()

In [None]:
df.groupby('PassengerId').size()

#Visualization of Data

In [None]:
df.hist(figsize = (20,20))

In [None]:
sn.pairplot(df, hue = 'Survived')

In [None]:
df.corr()

In [None]:
sn.heatmap(df.corr())

In [None]:
sn.countplot(x='Survived',data=df)
plt.show()

In [None]:
sn.scatterplot(x='Fare', y='Age',data=df)
plt.show()

In [None]:
sn.distplot(df['Age'])

#Data Cleaning

In [None]:
df.isnull().sum()

Age Data is Missing

In [None]:
plt.figure(figsize=(10,7))
sn.boxplot(x='Pclass',y='Age',data=df)

In [None]:
def age(cols):
    Age = cols[0]
    Pclass = cols[1]
    
    if pd.isnull(Age):

        if Pclass == 1:
            return 37

        elif Pclass == 2:
            return 29

        else:
            return 24

    else:
        return Age

In [None]:
df['Age'] = df[['Age','Pclass']].apply(age,axis=1)

In [None]:
df.isnull().sum()

In [None]:
df['Sex'].head()

In [None]:
pd.get_dummies(df['Sex'])

In [None]:
pd.get_dummies(df['Sex'],drop_first=True)

In [None]:
gender = pd.get_dummies(df['Sex'],drop_first=True)

In [None]:
df = pd.concat([df,gender],axis=1)

In [None]:
df.head()

In [None]:
df.drop('Cabin',axis=1,inplace=True)

In [None]:
df.head()

In [None]:
df.drop('Name',axis=1,inplace=True)

In [None]:
df.drop('PassengerId',axis=1,inplace=True)

In [None]:
df.drop('Embarked',axis=1,inplace=True)

In [None]:
df.drop('Sex',axis=1,inplace=True)

In [None]:
df.drop('Ticket',axis=1,inplace=True)

In [None]:
df.drop('Fare',axis=1,inplace=True)

In [None]:
df.head()

# Decide Dependant and Independant Elements in Data Set

In [None]:
x=df.drop('Survived',axis=1)

In [None]:
x

In [None]:
y=df['Survived']

In [None]:
y

#Train Test Split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test =train_test_split(x,y,test_size=0.3)

In [None]:
X_train.info()

In [None]:
X_test.info()

#Machine Learning Models

1. Linear
2. Logistic
3. SVC
4. KNN
5. K Mean
6. Desicion Tree
7. Random Forest
8. Naive Bayes

#Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logmodel = LogisticRegression() 

In [None]:
logmodel.fit(X_train,y_train)

In [None]:
predictions = logmodel.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
print(confusion_matrix(y_test,predictions))

#SMV

In [None]:
from sklearn.svm import SVC

In [None]:
SVM = SVC() 

In [None]:
SVM.fit(X_train,y_train)

In [None]:
predictions = SVM.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
print(confusion_matrix(y_test,predictions))

#KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier() 

In [None]:
knn.fit(X_train,y_train)

In [None]:
predictions = knn.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
print(confusion_matrix(y_test,predictions))

#K Mean

In [None]:
from sklearn.cluster import KMeans

In [None]:
Kmean = KMeans(n_clusters=2)

In [None]:
Kmean.fit(X_train,y_train)

In [None]:
predictions = Kmean.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
print(confusion_matrix(y_test,predictions))

#Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dtree = DecisionTreeClassifier()

In [None]:
dtree.fit(X_train,y_train)

In [None]:
predictions = dtree.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
print(confusion_matrix(y_test,predictions))

#Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc = RandomForestClassifier(n_estimators=100)

In [None]:
rfc.fit(X_train,y_train)

In [None]:
predictions = rfc.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
print(confusion_matrix(y_test,predictions))

#Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
 NB= GaussianNB() 

In [None]:
NB.fit(X_train,y_train)

In [None]:
predictions = NB.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
print(confusion_matrix(y_test,predictions))

#Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lm = LinearRegression()

# cannot apply Linear Regression to this Data

#Linear Discriminant Analysis

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [None]:
LDA = LinearDiscriminantAnalysis() 

In [None]:
LDA.fit(X_train,y_train)

In [None]:
predictions = LDA.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
print(confusion_matrix(y_test,predictions))