In [8]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import time
import math

In [9]:
def setting_data(data=None):
    feature_matrix = np.ones((np.ma.size(data,axis=0),np.ma.size(data,axis=1)+1))
    feature_matrix[:,1:] = data
    return feature_matrix

In [10]:
def predict_values(feature_matrix=None, weights=None):
    return np.dot(a=feature_matrix,b=weights)

In [11]:
def polynomial_features_generation(data=None, degree=0):
    return np.hstack((data,np.power(data,np.arange(start=2,stop=degree+1))))

In [12]:
def standardization(data=None):
    data = data.astype(float)
    for i in range(data.shape[1]):
        data[:,i] = np.divide(np.subtract(data[:,i],np.mean(data[:,i])), np.std(data[:,i])).astype(float)
    return data

In [13]:
def min_max_scaling(data=None):
    data = data.astype(float)
    for i in range(data.shape[1]):
        min_ = np.min(data[:,i])
        max_ = np.max(data[:,i])
        max_minus_min = max_ - min_
        data[:,i] = np.divide(np.subtract(data[:,i],min_),max_minus_min)
    return data

In [29]:
def normalize(feature_matrix=None):
    norms = np.sqrt(np.sum(np.square(feature_matrix),axis=0))
    normalized_features = feature_matrix/norms
    return (normalized_features,norms)

In [14]:
def root_mean_squared_error(y=None,y_bar=None,no_of_observations=0):
    return np.sqrt(np.divide(np.sum(np.square(np.subtract(y,y_bar))),no_of_observations))

In [15]:
def gradient_descent(x=None,y=None,tolerance_gradient=0.01, tolerance_cost=0.0000001,
                     alpha = 0.001, max_iter=10000,l2_regularization=0):
   
    feature_matrix = setting_data(data=x)
    
    weights = np.random.rand(x.shape[1] + 1, 1)
    partials = np.zeros(x.shape[1]+1).reshape((-1,1))
    
    i = 0
    prev_cost = 10
    costs = []
    gradients = []
    
    converged = False
    while not converged:
        i += 1
        
        predicted = predict_values(feature_matrix=feature_matrix, weights=weights)
        errors = np.subtract(predicted, y)
        #Updating weights
        
        #taking derivative
        partials = np.dot(feature_matrix.T,errors)
        #This is the total change :p
#         step_size = np.multiply(alpha,partials)
#         weights_regularized = np.vstack((weights[0],np.multiply(weights[1:],1-2*alpha*l2_regularization)))
        
        weights = np.subtract(np.vstack((weights[0],np.multiply(weights[1:],1-2*alpha*l2_regularization))),
                              np.multiply(alpha,partials))
        
        gradient_magnitude = np.sqrt(np.sum(np.square(partials)))
        cur_cost = np.sum(np.square(errors))
        
#         gradients = np.append(gradients,gradient_magnitude)
#         costs = np.append(costs,cur_cost)
        gradients.append(gradient_magnitude)
        costs.append(cur_cost)

#         if i%1000==0:
#             print(cur_cost)
#         print(abs(cur_cost - prev_cost))
        if (gradient_magnitude <= tolerance_gradient) or (abs(cur_cost - prev_cost) < tolerance_cost):
            if gradient_magnitude <= tolerance_gradient:
                print('[Gradient Magnitude - Tolerance] Satisfied')
            else:
                print('[Cost Tolerance - Precision] Satisfied')
            converged = True

        if i >= max_iter:
            print('Iterations Completed')
            converged = True
        
        prev_cost = cur_cost
    return (i, np.array(weights),np.array(gradients), costs)

In [None]:
weights = np.insert(weights,j,weight_j,0)def coordinate_descent(x=None,y=None,tolerance_gradient=0.01, 
                       tolerance_cost=0.0000001,alpha = 0.001, max_iter=10000,
                       l1_regularization=0):
    
    feature_matrix = setting_data(data=x)
    
    weights = np.random.rand(x.shape[1] + 1, 1)
    iteration = 0
    prev_cost = 10
    costs = []
    gradients = []
    
    converged = False
    while not converged:
        iteration += 1
        for j in range(0,x.shape[1]-1):
            #saving for later insertion
            feature_j = feature_matrix[:,j]
            weight_j = weights[j]
            
            #deleting from matrix for performing operation on matrix
            feature_matrix = np.delete(feature_matrix,j,1)
            weights = np.delete(weights,j,0)
            
            #performing operations
            predicted = predict_values(feature_matrix=feature_matrix, weights=weights)
            
            #Inserting values back in matrices
            feature_matrix = np.insert(feature_matrix,j,feature_j,1)
            weights = np.insert(weights,j,weight_j,0)
            
            #calculating errors
            errors = np.subtract(predicted, y)
            
            roo_j = np.dot(feature_matrix.T,errors)

In [16]:
data_types = dtype_dict = {'bathrooms':float, 'waterfront':int, 
                           'sqft_above':int, 'sqft_living15':float, 
                           'grade':int, 'yr_renovated':int, 'price':float, 
                           'bedrooms':float, 'zipcode':str, 'long':float, 
                           'sqft_lot15':float, 'sqft_living':float, 
                           'floors':str, 'condition':int, 'lat':float, 
                           'date':str, 'sqft_basement':int, 'yr_built':int, 
                           'id':str, 'sqft_lot':int, 'view':int}
data= pd.read_csv('Related Datasets/kc_house_train_data.csv',dtype=data_types)

In [30]:
X = np.random.rand(50).reshape(10,5)
X

array([[0.19950585, 0.39435213, 0.92667488, 0.90217904, 0.33609493],
       [0.2168195 , 0.78884719, 0.57322897, 0.33414328, 0.48069736],
       [0.62707871, 0.95453995, 0.72615349, 0.94609055, 0.84480906],
       [0.81113138, 0.83506624, 0.99519287, 0.37987991, 0.61312186],
       [0.82230393, 0.1161766 , 0.74884054, 0.61704703, 0.64625948],
       [0.30762416, 0.09954673, 0.50343005, 0.48840257, 0.46650352],
       [0.01957862, 0.86787931, 0.50425555, 0.4775349 , 0.87098522],
       [0.95503305, 0.90424297, 0.95294636, 0.42927533, 0.74250794],
       [0.72229204, 0.16221511, 0.05803657, 0.97092983, 0.25494678],
       [0.0511185 , 0.80221669, 0.27413082, 0.00728241, 0.94093357]])

In [32]:
np.array(X[:,2]).reshape(-1,1)

array([[0.92667488],
       [0.57322897],
       [0.72615349],
       [0.99519287],
       [0.74884054],
       [0.50343005],
       [0.50425555],
       [0.95294636],
       [0.05803657],
       [0.27413082]])

In [33]:
w = X[:,2]

In [54]:
new_X = np.delete(X,2,1)
new_X

array([[0.19950585, 0.39435213, 0.90217904, 0.33609493],
       [0.2168195 , 0.78884719, 0.33414328, 0.48069736],
       [0.62707871, 0.95453995, 0.94609055, 0.84480906],
       [0.81113138, 0.83506624, 0.37987991, 0.61312186],
       [0.82230393, 0.1161766 , 0.61704703, 0.64625948],
       [0.30762416, 0.09954673, 0.48840257, 0.46650352],
       [0.01957862, 0.86787931, 0.4775349 , 0.87098522],
       [0.95503305, 0.90424297, 0.42927533, 0.74250794],
       [0.72229204, 0.16221511, 0.97092983, 0.25494678],
       [0.0511185 , 0.80221669, 0.00728241, 0.94093357]])

In [44]:
np.insert(X,2,np.array([1,2,3,4,5,6,7,8,9,10]),1)

array([[1.99505847e-01, 3.94352128e-01, 1.00000000e+00, 9.26674884e-01,
        9.02179043e-01, 3.36094932e-01],
       [2.16819501e-01, 7.88847194e-01, 2.00000000e+00, 5.73228973e-01,
        3.34143281e-01, 4.80697365e-01],
       [6.27078709e-01, 9.54539950e-01, 3.00000000e+00, 7.26153493e-01,
        9.46090549e-01, 8.44809063e-01],
       [8.11131377e-01, 8.35066238e-01, 4.00000000e+00, 9.95192874e-01,
        3.79879910e-01, 6.13121857e-01],
       [8.22303933e-01, 1.16176605e-01, 5.00000000e+00, 7.48840540e-01,
        6.17047035e-01, 6.46259477e-01],
       [3.07624158e-01, 9.95467296e-02, 6.00000000e+00, 5.03430053e-01,
        4.88402565e-01, 4.66503521e-01],
       [1.95786178e-02, 8.67879310e-01, 7.00000000e+00, 5.04255548e-01,
        4.77534903e-01, 8.70985219e-01],
       [9.55033054e-01, 9.04242969e-01, 8.00000000e+00, 9.52946361e-01,
        4.29275326e-01, 7.42507945e-01],
       [7.22292042e-01, 1.62215106e-01, 9.00000000e+00, 5.80365655e-02,
        9.70929831e-01, 

In [73]:
weights = np.random.rand(10+ 1, 1)
weights.shape

(11, 1)

In [74]:
weights

array([[0.82059623],
       [0.62153869],
       [0.47531738],
       [0.0678297 ],
       [0.10293724],
       [0.38557779],
       [0.55093306],
       [0.82720093],
       [0.47093431],
       [0.11559756],
       [0.05300184]])

In [75]:
weights = np.delete(weights,1,0)
weights

array([[0.82059623],
       [0.47531738],
       [0.0678297 ],
       [0.10293724],
       [0.38557779],
       [0.55093306],
       [0.82720093],
       [0.47093431],
       [0.11559756],
       [0.05300184]])

In [76]:
weights = np.insert(weights,1,1,0)
weights

array([[8.20596230e-01],
       [1.01000000e+02],
       [4.75317381e-01],
       [6.78296987e-02],
       [1.02937240e-01],
       [3.85577791e-01],
       [5.50933059e-01],
       [8.27200934e-01],
       [4.70934308e-01],
       [1.15597556e-01],
       [5.30018394e-02]])