In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv

%matplotlib  inline
import seaborn as sns
from sklearn import datasets

In [None]:
df = pd.read_csv("data/chdage.txt", sep="\s+", index_col=0)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.CHD.value_counts()

In [None]:
### Visualize the data
plt.figure(figsize=(10,5))
plt.scatter(df.AGE, df.CHD, s=30, c='r', marker='x', linewidths=1)
plt.xlabel('Age (Years)')
plt.ylabel('Coronary Heart Disease');

In [None]:
def age_grp(x):
    if 20 <= x <= 29:
        return 1
    elif 30 <= x <=34:
        return 2
    elif 35 <= x <=39:
        return 3
    elif 40 <= x <=44:
        return 4
    elif 45 <= x <=49:
        return 5
    elif 50 <= x <=54:
        return 6
    elif 55 <= x <=59:
        return 7
    elif 60 <= x <=69:
        return 8    

In [None]:
df['age_group'] = df.AGE.apply(age_grp)

In [None]:
df1 = df.groupby('age_group').mean()[['CHD']]

In [None]:
df1.reset_index(inplace=True)
df1

In [None]:
### Visualize the data
plt.figure(figsize=(10,3))
plt.scatter(df1.age_group, df1.CHD, s=30, c='r', marker='x', linewidths=1)
plt.xlabel('Age Group')
plt.ylabel('Mean values - Coronary Heart Disease');

In [None]:
X = df.AGE.values.reshape(-1,1)
y = df.CHD.values

In [None]:
### Linear Regression approach
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X, y)
y_pred = reg.predict(X)
print("Maximum value : " , np.max(y_pred))
print("Minimum value : " , np.min(y_pred))
print("Intercept for the model: ", reg.intercept_)
print("Coefficent for the model: ", reg.coef_)

In [None]:
-0.537960351606728 + 0.02181073 * X[0]

## So whats next ?? How we can predict values in range between 0 and 1??

### S Curve
![title](asset/scurve.png)

--------------

![title](asset/log_pred.png)

# Hypothesis function 

![title](asset/log_hypothesis.png)

----------

# Cost Function

![title](asset/log_cost.png)

# Binary Logistic Regression

In [None]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data[:, :2]
y = (iris.target != 0) * 1

In [None]:
class LR:
    def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, verbose=False):
        self.lr = lr
        self.num_iter = num_iter
        self.fit_intercept = fit_intercept
        self.verbose = verbose
    
    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)
    
    def __sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    def __loss(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = self.__add_intercept(X)
        
        # weights initialization
        self.theta = np.zeros(X.shape[1])
        
        for i in range(self.num_iter):
            z = np.dot(X, self.theta)
            h = self.__sigmoid(z)
            gradient = np.dot(X.T, (h - y)) / y.size
            self.theta -= self.lr * gradient
            
            if(self.verbose == True and i % 10000 == 0):
                z = np.dot(X, self.theta)
                h = self.__sigmoid(z)
                print(f'loss: {self.__loss(h, y)} \t')
    
    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__add_intercept(X)
    
        return self.__sigmoid(np.dot(X, self.theta))
    
    def predict(self, X, threshold=0.5):
        return self.predict_prob(X) >= threshold

In [None]:
model = LR(lr=0.1, verbose=True, num_iter=300000)
%time model.fit(X, y)
preds = model.predict(X)
# accuracy
(preds == y).mean()

In [None]:
model.theta

In [None]:
model.predict_prob(X)

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color='b', label='0')
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color='r', label='1')
plt.legend()
x1_min, x1_max = X[:, 0].min(), X[:,0].max(),
x2_min, x2_max = X[:,1].min(), X[:,1].max(),
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
grid = np.c_[xx1.ravel(), xx2.ravel()]
probs = model.predict_prob(grid).reshape(xx1.shape)
plt.contour(xx1, xx2, probs, [0.5], linewidths=1, colors='black');

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model1 = LogisticRegression(solver='saga', max_iter=1000)

In [None]:
%time model1.fit(X, y)

In [None]:
preds = model1.predict(X)
(preds == y).mean()

In [None]:
model1.intercept_, model1.coef_ 