# Python Machine Learning Diploma - Orientation Session

Today's Outline:
- Diploma Orientation
- Full Data Science Projects:
    - House Prices Prediction [Regression]
    - Titanic Survivals Prediction [Classification]

==========

## The Data Science Process

In [None]:
from IPython.display import Image
Image("imgs/ml-process.jpeg")

==========

## Full Data Science Project - Titanic Survivals Prediction [Classification]

Predict survival on the Titanic and get familiar with ML basics
- https://www.kaggle.com/c/titanic

### Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

### Importing Dataset

In [None]:
titanic = pd.read_csv('data/titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.info()

### Data Cleaning & Pre-processing

##### Dealing with Missing Data

In [None]:
titanic.isnull().sum()

In [None]:
sns.heatmap(titanic.isnull(),yticklabels=False,cbar=False,cmap='viridis')

In [None]:
titanic['Age'] = titanic['Age'].fillna(titanic['Age'].mean())

In [None]:
sns.heatmap(titanic.isnull(),yticklabels=False,cbar=False,cmap='viridis')

In [None]:
titanic.drop('Cabin',axis=1,inplace=True)

In [None]:
titanic.head()

In [None]:
titanic.dropna(inplace=True)

##### Converting Categorical Features

In [None]:
titanic.info()

In [None]:
sex = pd.get_dummies(titanic['Sex'],drop_first=True)
sex

In [None]:
embark = pd.get_dummies(titanic['Embarked'],drop_first=True)
embark

In [None]:
titanic.drop(['Sex','Embarked','Name','Ticket'],axis=1,inplace=True)

In [None]:
titanic = pd.concat([titanic,sex,embark],axis=1)

In [None]:
titanic.head()

### Exploratory Data Analysis

In [None]:
sns.set_style('whitegrid')

In [None]:
sns.countplot(x='Survived',data=titanic,palette='pastel')

In [None]:
sns.set_style('whitegrid')
sns.countplot(x='Survived',hue='Pclass',data=titanic,palette='rainbow')

In [None]:
titanic['Age'].hist(bins=30,color='darkred',alpha=0.7)

In [None]:
sns.countplot(x='SibSp',data=titanic)

In [None]:
titanic['Fare'].hist(color='green',bins=40,figsize=(8,4))

### Building Our Model

##### Train / Test Split Data

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = titanic.drop('Survived',axis=1)
y = titanic['Survived']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=101)

##### Model Training and Predicting

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model = LogisticRegression(max_iter=5000)

In [None]:
model.fit(X_train,y_train)

In [None]:
predictions = logmodel.predict(X_test)

In [None]:
predictions

##### Model Evaluation

In [None]:
from sklearn.metrics import classification_report

In [None]:
classification_report(y_test,predictions)

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(y_test,predictions)

==========

# THANK YOU!