In [31]:
import pandas as pd
import numpy as np
from collections import defaultdict

In [103]:
class KNNClassifier():
    def __init__(self,k):
        self.k = k
        
    def euclidean_distance(self,X1,X2):
        if len(X1) != len(X2):
            return -1
        else:
            ss = 0
            for i in range(len(X1)):
                ss+= (X1[i] - X2[i]) ** 2
            distance = ss ** (0.5)
        return distance
    
    def vote(self,index,values):
        count = defaultdict(int)
        for i in index:
            count[values[i]]+=1
        predProba = {}
        pred = 0
        maxi = max(count.values())
        for key,val in count.items():
            predProba[key] = val/self.k
            if val == maxi:
                pred = key
        return {'pred':pred,'predProba':predProba}
    
    def smallestKDistanceIndex(self,distance):
        distIndex = defaultdict(list)
        for i in range(len(distance)):
            distIndex[distance[i]].append(i)
        final = []
        keys = sorted(distIndex.keys())
        for key in keys:
            final.extend(distIndex[key])
        print(final[:self.k])
        return final[:self.k]
                
    def predict(self,X_train,Y_train,X_test):
        ans = {}
        for testIndex in range(len(X_test)):
            distance = [0] * len(X_train)
            for trainIndex in range(len(X_train)):
                distance[trainIndex] = self.euclidean_distance(X_test[testIndex],X_train[trainIndex])
            print(distance)
            minimumDistanceIndex = self.smallestKDistanceIndex(distance)
            ans[testIndex] = self.vote(minimumDistanceIndex,Y_train)
        
        return ans
        
    

In [235]:
class linearRegression:
    def __init__(self,lr):
        self.learning_rate = lr
        self.params = None
    
    def fit(self,X_train,Y_train,n_epochs):
        n_features = X_train.shape[1]
        n_training = X_train.shape[0]
        W = np.zeros((n_features,1))
        B = 0
        
        for i in range(n_epochs):
            Y_dash = np.dot(X_train,W) + B
            difference = np.subtract(Y_dash,Y_train)
            SSE = np.sum(np.square(difference))
            MSE = SSE/n_training
            dW = np.dot(X_train.T,difference)/n_training
            dB = np.sum(difference)/n_training
            W = W - self.learning_rate * dW
            B = B - self.learning_rate * dB
        
        self.params = (W,B)
            
    def predict(self,X_test):
        W,B = self.params
        y_dash = np.dot(X_test,W) +B
        
        return y_dash

In [236]:
lr = linearRegression(0.1)

In [237]:
X_train = np.array([[1],[2],[3]])
Y_train = np.array([[1],[3],[-1],[2]])
X_test = np.array([[1,-1,2],[2,3,4]])

In [238]:
lr.fit(Y_train,Y_train,1000)

In [239]:
lr.predict(X_train)

array([[ 1.],
       [ 2.],
       [ 3.]])

In [265]:
class LogisticRegression:
    def __init__(self,lr):
        self.learning_rate = lr
        self.params = None
        
    def fit(self,X_train,Y_train,n_epochs):
        n_features = X_train.shape[1]
        n_training = X_train.shape[0]
        W = np.zeros((n_features,1))
        B = 0
        for i in range(n_epochs):
            logit = np.dot(X_train,W) + B
            Y_dash = 1/(1+np.exp(-1 * logit))
            loss = np.sum((-Y_train * np.log(Y_dash)) - (1-Y_train) * np.log(1-Y_dash))
            mean_loss = loss/n_training
            dW = np.dot(X_train.T,(Y_dash-Y_train))/n_training
            dB = np.sum(Y_dash-Y_train)/n_training
            W = W - self.learning_rate * dW
            B = B - self.learning_rate * dB
        
        self.params = (W,B)
        
    def predict(self,X_test):
        W,B = self.params
        logit = np.dot(X_test,W) + B
        Y_dash = 1/(1+np.exp(-1 * logit))
        return Y_dash

In [266]:
X = np.random.randn(10,4)
Y = np.random.randint(low=2,size=(10,1))
logistic = LogisticRegression(0.1)

In [267]:
logistic.fit(X,Y,1000)

In [269]:
logistic.predict(X)

array([[  2.07014501e-01],
       [  9.39531045e-01],
       [  5.92793667e-01],
       [  6.96543105e-01],
       [  6.62364511e-01],
       [  9.90736886e-01],
       [  6.59826267e-01],
       [  4.59648365e-02],
       [  2.40951124e-01],
       [  1.83664170e-04]])

In [None]:
#Split n points into k clusters
# Choose k centroids in random
# Calculate distance from each point to each centroid
# Calculate the average of points in a cluster to put a new centroid
# Number of epochs
# (10,5)-> (100,5)

In [343]:
import random

In [376]:
class KMeansClustering:
    def __init__(self,k):
        self.k = k
        self.centroid = None
        self.cluster = None
        
    def fit(self,X_train,n_epochs):
        n_features = X_train.shape[1]
        n_training = X_train.shape[0]
        
        indices = np.random.choice(replace=False,size=self.k,a=n_training)
        centroid = X_train[indices]
        print(centroid)
        clusters = defaultdict(list)
        for epoch in range(n_epochs):
            clusters = defaultdict(list)
            for t in range(len(X_train)):
                distance = [0] * len(centroid)
                for c in range(len(centroid)):
                    distance[c] = np.linalg.norm(X_train[t] - centroid[c])
                cl = distance.index(min(distance))
                clusters[cl].append(X_train[t])
            for key,val in clusters.items():
                centroid[key] = np.average(val,axis=0)
        
        self.centroid = centroid
        self.cluster = clusters
        

In [377]:
kmeans = KMeansClustering(3)

In [378]:
x= np.array([[1,2],[2,4],[4,6],[6,7],[8,10],[11,14],[12,16]])

In [379]:
np.average(x,axis=0)

array([ 6.28571429,  8.42857143])

In [380]:
kmeans.fit(x,100)

[[12 16]
 [11 14]
 [ 8 10]]


In [381]:
kmeans.cluster

defaultdict(list,
            {0: [array([11, 14]), array([12, 16])],
             1: [array([6, 7]), array([ 8, 10])],
             2: [array([1, 2]), array([2, 4]), array([4, 6])]})

In [383]:
kmeans.centroid

array([[11, 15],
       [ 7,  8],
       [ 2,  4]])