## Mounting Colab on my Google Drive

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Changing the directory 

In [None]:
%cd /content/drive/MyDrive/Tutorials/Tutorial 1

/content/drive/MyDrive/Tutorials/Tutorial 1


Checking the contents of the current directory

In [None]:
!ls

 Titanic_Data.csv  'Tutorial1 Solution.ipynb'  'Tutorial1 Template.ipynb'


# Tutorial 1 The Titanic Disaster

## Project Summary
Predict which passengers survived the Titanic shipwreck.

## Importing Some Basic Libraries

In [None]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt

## Importing the Dataset

In [None]:
dataset = pd.read_csv('Titanic_Data.csv')

## Showing the Dataset in a Table

In [None]:
dataset.head(3)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,Number of Siblings/Spouses Aboard,Number of Parents/Children Aboard,Fare,Embarked,Survived
0,1,3,"Braund, Mr. Owen Harris",male,22.0,1,0,7.25,S,No
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,71.2833,C,Yes
2,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,7.925,S,Yes


## A Quick Review of the Data

In [None]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 10 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   PassengerId                        891 non-null    int64  
 1   Pclass                             891 non-null    int64  
 2   Name                               891 non-null    object 
 3   Sex                                891 non-null    object 
 4   Age                                714 non-null    float64
 5   Number of Siblings/Spouses Aboard  891 non-null    int64  
 6   Number of Parents/Children Aboard  891 non-null    int64  
 7   Fare                               891 non-null    float64
 8   Embarked                           889 non-null    object 
 9   Survived                           891 non-null    object 
dtypes: float64(2), int64(4), object(4)
memory usage: 69.7+ KB


## Encoding Categorical Data

### Encoding the Input Data

**For the gender column**

In [None]:
gender = {"male":0.0, "female":1.0}

In [None]:
dataset['Sex'] = dataset['Sex'].map(gender)

In [None]:
dataset['Sex'].head()

0    0.0
1    1.0
2    1.0
3    1.0
4    0.0
Name: Sex, dtype: float64

For the 'embarked' column

In [None]:
print(dataset['Embarked'].unique())

['S' 'C' 'Q' nan]


In [None]:
ports = {'S':0.0, "C":1.0, 'Q':2.0}

In [None]:
dataset['Embarked'] = dataset['Embarked'].map(ports)

In [None]:
dataset['Embarked'].head()

0    0.0
1    1.0
2    0.0
3    0.0
4    0.0
Name: Embarked, dtype: float64

### Encoding the Output Data (Labels)

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
dataset['Survived'] = le.fit_transform(dataset['Survived'])

In [None]:
dataset['Survived'].head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64

## Dropping Irrelevant Input Data 

In [None]:
dataset.drop(['PassengerId', 'Name'], axis=1, inplace=True)


## Checking the Preprocessed Dataset

In [None]:
dataset.head()

Unnamed: 0,Pclass,Sex,Age,Number of Siblings/Spouses Aboard,Number of Parents/Children Aboard,Fare,Embarked,Survived
0,3,0.0,22.0,1,0,7.25,0.0,0
1,1,1.0,38.0,1,0,71.2833,1.0,1
2,3,1.0,26.0,0,0,7.925,0.0,1
3,1,1.0,35.0,1,0,53.1,0.0,1
4,3,0.0,35.0,0,0,8.05,0.0,0


## Seperate the input and output

In [None]:
X = dataset.iloc[:, 0:7]
y = dataset.iloc[:, -1]

## Showing the Input Data in a Table Format

In [None]:
X.head()

Unnamed: 0,Pclass,Sex,Age,Number of Siblings/Spouses Aboard,Number of Parents/Children Aboard,Fare,Embarked
0,3,0.0,22.0,1,0,7.25,0.0
1,1,1.0,38.0,1,0,71.2833,1.0
2,3,1.0,26.0,0,0,7.925,0.0
3,1,1.0,35.0,1,0,53.1,0.0
4,3,0.0,35.0,0,0,8.05,0.0


## A Quick Check of the Output Data

In [None]:
y.head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64

## Taking Care of Missing Data Inputs

In [None]:
from sklearn.impute import SimpleImputer

In [None]:
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')

In [None]:
X = imputer.fit_transform(X)

## Splitting the Dataset into the Training Set and Test Set

In [None]:
from sklearn.model_selection import train_test_split


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [None]:
X_train.shape

(712, 7)

In [None]:
X_test.shape

(179, 7)

## Scaling the Features

In [None]:
# Scale the age and fare
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X[:, [2,5]] = sc.fit_transform(X[:, [2,5]])

## Training and Testing Predictive Models

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
# Support vector machine
from sklearn.svm import SVC
sv_classfier = SVC(kernel = 'rbf')
sv_classfier.fit(X_train, y_train)
Y_pred = sv_classfier.predict(X_test)
print(accuracy_score(y_test, Y_pred))

0.6536312849162011


In [None]:
from sklearn.linear_model import LogisticRegression
# Linear regression
lr = LogisticRegression()
lr.fit(X_train, y_train)
Y_pred = lr.predict(X_test)
print(accuracy_score(y_test, Y_pred))

0.7988826815642458
