# Importing Packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import itertools
from sklearn.metrics import confusion_matrix


# Functions


In [None]:
def MinMaxScaler(rawData):
    minValue = min(rawData)
    maxValue = max(rawData)
    return (rawData/(maxValue-minValue) - minValue/(maxValue-minValue))

In [None]:
def kNNImputer(array):
    output = copy.deepcopy(array)
    nanIndices = np.where(np.isnan(array))[0]
    array[np.isnan(array)] = 0
    for i in range (len(nanIndices)):
        if nanIndices[i] <= 1:
            kNNMean = np.sum(array[0:nanIndices[i]+2])/5
            output[nanIndices[i]] = kNNMean
        elif nanIndices[i] >= len(array)-2:
            kNNMean = np.sum(array[nanIndices[i]-2:len(array)-1])/5
            output[nanIndices[i]] = kNNMean
        else:
            kNNMean = np.sum(array[nanIndices[i]-2:nanIndices[i]+2])/5
            output[nanIndices[i]] = kNNMean
    return output
    

In [None]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

In [None]:
def gradientDescentLinReg(wInit,x,y,threshold,alpha):
    isDiverged = False
    noOfIterations = 1
    percentageChangeOfCost = -100000
    w = copy.deepcopy(wInit)
    cost = []
    while percentageChangeOfCost < threshold:
        h = np.dot(x, w)
        error = h - y
        currentCost = np.sum(error ** 2) / (2 * m)
        if noOfIterations > 1:
            percentageChangeOfCost = 100 * (currentCost - cost[noOfIterations-1-1]) / currentCost
        if percentageChangeOfCost > 0:
            isDiverged = True
            break
        gradientVector = np.dot(x.T, error)
        w -= (alpha / m) * gradientVector
        cost.append(currentCost)
        noOfIterations += 1
    return w,noOfIterations,cost,isDiverged

In [None]:
def gradientDescentLogReg(wInit,x,y,threshold,alpha):
    isDiverged = False
    noOfIterations = 1
    percentageChangeOfCost = -100000
    w = copy.deepcopy(wInit)
    cost = []
    while percentageChangeOfCost < threshold:
        h = sigmoid(np.dot(x, w))
        error = h - y
        currentCost = -(np.dot(y.T,np.log(h))[0][0] + np.dot((1-y.T),np.log(1-h))[0][0]) / m
        if noOfIterations > 1:
            percentageChangeOfCost = 100 * (currentCost - cost[noOfIterations-1-1]) / currentCost
        if percentageChangeOfCost > 0:
            isDiverged = True
            break
        gradientVector = np.dot(x.T, error)
        w -= (alpha / m) * gradientVector
        cost.append(currentCost)
        noOfIterations += 1
    return w,noOfIterations,cost,isDiverged

In [None]:
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

# Importing Data

In [None]:
trainData = pd.read_csv("../input/titanic/train.csv")
testData  = pd.read_csv("../input/titanic/test.csv")

# Preprocessing Input and Output


## Inputs

In [None]:
sex = np.where(trainData['Sex'] == 'female',1,0)
pclass = np.array(trainData["Pclass"])
age = np.array(trainData['Age'])

## Missing Values

In [None]:
ageImputed = kNNImputer(age)

## Normalizing Input Data

In [None]:
sexNormalized = MinMaxScaler(sex)
pclassNormalized = MinMaxScaler(pclass)
ageNormalized = MinMaxScaler(ageImputed)

## Labels

In [None]:
survived = np.array(trainData["Survived"])

## No of Data Points

In [None]:
m = survived.shape[0]

# Model Fitting

## Linear Regression

### Computing input x and y


In [None]:
x = np.c_[np.ones((m,1)),sex,pclass,ageImputed]
y = survived.reshape(-1,1)

### Initializing Variables

In [None]:
stop = False
alpha = 0.0001
threshold = -0.0001
wInit = np.zeros((x.shape[1], 1))
costList = []

### Applying Gradient Descent

In [None]:
while (not stop):
    print(alpha)
    w,noOfIterations,cost,isDiverged = gradientDescentLinReg(wInit,x,y,threshold,alpha)
    stop = isDiverged
    if (not isDiverged):
        currentCostListItem = [alpha, cost]
        costList.append(currentCostListItem)
        alpha *= 3
        wFinalLinReg = w
        print(noOfIterations,wFinalLinReg)


### Cost Vs Varying Alpha

In [None]:
for index in range (len(costList)):
    plt.plot(costList[index][1])

## Prediction 

In [None]:
linRegPred = np.dot(x,wFinalLinReg)
linRegPred

# Logistic Regression

## Initializing Variables

In [None]:
stop = False
alpha = 0.001
threshold = -0.0001
wInit = np.zeros((x.shape[1], 1))
costList = []

## Applying Gradient Descent

In [None]:
while (not stop):
    print(alpha)
    w,noOfIterations,cost,isDiverged = gradientDescentLogReg(wInit,x,y,threshold,alpha)
    stop = isDiverged
    if (not isDiverged):
        currentCostListItem = [alpha, cost]
        costList.append(currentCostListItem)
        alpha *= 3
        wFinalLogReg = w
        print(noOfIterations,wFinalLogReg)

## Cost Vs Varying Alpha

In [None]:
for index in range (len(costList)):
    plt.plot(costList[index][1])

In [None]:
wFinalLogReg

## Prediction

In [None]:
logRegPred = sigmoid(np.dot(x,wFinalLogReg))
logRegPredBinary = np.where(logRegPred >= 0.5, 1, 0)
pd.DataFrame.from_records(logRegPredBinary).to_csv('pred.csv',index=False)


## Testing the Model

### Confusion Matrix

In [None]:
plot_confusion_matrix(cm           = confusion_matrix(y,logRegPredBinary), 
                      normalize    = False,
                      target_names = ['dead', 'alive'],
                      title        = "Confusion Matrix")

## Visualizing the Plane of Decision Boundary

In [None]:
xx, yy = np.meshgrid(range(2), range(4))
z = -(wFinalLogReg[1] * xx + wFinalLogReg[2] * yy + wFinalLogReg[0]) / wFinalLogReg[3]

plt3d = plt.figure().gca(projection='3d')
plt3d.plot_surface(xx, yy, z, alpha=0.2)
plt3d.scatter(sex, pclass, ageImputed)

In [None]:
import plotly_express as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go

layout = go.Layout(
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0  
    )
    
)




trace1 = go.Scatter3d(
    x=sex.flatten(),
    y=pclass.flatten(),
    z=ageImputed.flatten(),
    mode='markers',
    marker=dict(
        size=3,
        color='rgb(255,0,0)',                # set color to an array/list of desired values      
    )
)

trace2 = go.Surface(z=z, x=xx, y=yy)

fig = go.Figure(data=[trace1,trace2], layout=layout)

fig.show()
