In [1]:
import numpy as np
import matplotlib.pyplot as plt
from prettytable import PrettyTable

In [4]:
abalone_data = np.genfromtxt('./Datasets/Dataset.data', delimiter=' ') 
abalone_data = np.delete(abalone_data, 0, 1) #Delete sex because its NaN
abalone_data = np.insert(abalone_data, 0, 1, axis = 1) #replace with all 1 for indep

NbAttributes = np.shape(abalone_data)[1] #9 (counting indep)
NbCases = np.shape(abalone_data)[0]      #4177
lables = ['indep', 'length', 'diameter','height','whole_weight','Shucked_weight','viscera_weight','shell_weight', 'rings']

t = PrettyTable(lables)
for i in range(NbCases):
    t.add_row( [abalone_data[i][j] for j in range(NbAttributes)] )

print(t.get_string(start = 0, end = 10))

+-------+--------+----------+--------+--------------+----------------+----------------+--------------+-------+
| indep | length | diameter | height | whole_weight | Shucked_weight | viscera_weight | shell_weight | rings |
+-------+--------+----------+--------+--------------+----------------+----------------+--------------+-------+
|  1.0  | 0.455  |  0.365   | 0.095  |    0.514     |     0.2245     |     0.101      |     0.15     |  15.0 |
|  1.0  |  0.35  |  0.265   |  0.09  |    0.2255    |     0.0995     |     0.0485     |     0.07     |  7.0  |
|  1.0  |  0.53  |   0.42   | 0.135  |    0.677     |     0.2565     |     0.1415     |     0.21     |  9.0  |
|  1.0  |  0.44  |  0.365   | 0.125  |    0.516     |     0.2155     |     0.114      |    0.155     |  10.0 |
|  1.0  |  0.33  |  0.255   |  0.08  |    0.205     |     0.0895     |     0.0395     |    0.055     |  7.0  |
|  1.0  | 0.425  |   0.3    | 0.095  |    0.3515    |     0.141      |     0.0775     |     0.12     |  8.0  |
|

## Apply Gradient Descent
$$ \theta_{j}  = \theta_{j} - \alpha\frac{1}{m} \sum_{i=1}^{m} [h_\theta (X^{i}) - Y^{i}]*X_j^{i} $$

In [71]:
NbVariables = NbAttributes - 1 #because the result is not considered a variable

X = np.delete(abalone_data, NbVariables, 1) #only the parameters
Y = abalone_data.T[NbAttributes-1].reshape(NbCases, 1) # Only the result which is the nb or rings as a column

In [72]:
# Define the gradient correspondent to the cuadratic const function

# Th is the previous value of Th we had as a vector containing many thetas
# j is the index of Th we wish to upate, we will have to update all or some for stochastic gradient descent
# X is a matrix with colums being the variables used for learning
# Y is a column vecotr which gives the correct results for the parameters of X
def gradient (Th, j, X, Y):
    gradient = 0
    
    for i in range(NbCases):
        gradient += np.dot( ( np.dot( Th, X[i] ) - Y[i] ), X[i][j] )[0]
    
    return gradient

In [94]:
# Use the gradient of each parameter to update accordingly
Th = np.ones(NbVariables) # Initialize to random values like all 1's
Alph = 0.001
m = NbAttributes

iterations = 50

for k in range(iterations):
    NewTh = Th #Define a new th as placeholder because we want to update all at once
    
    for j in range(NbVariables):
        NewTh[j] -= Alph/m * gradient(Th, j, X, Y)
    
    Th = NewTh    

3.90290388889
4.63178043111
4.85791559768
4.96373303577
5.03708612177
5.0984744617
5.15297012199
5.20208673336
5.24649459114
5.28664366971
5.32290834076
5.35562259284
5.38509004576
5.41158803785
5.43537020568
5.45666857934
5.47569543879
5.49264499849
5.50769494546
5.52100784582
5.53273243143
5.54300477701
5.55194937717
5.55968013167
5.56630124697
5.571908061
5.57658779777
5.58042025775
5.58347844955
5.58582916781
5.58753352197
5.58864741997
5.58922201085
5.58930408972
5.58893646817
5.58815831325
5.5870054576
5.58551068312
5.58370398061
5.58161278723
5.57926220377
5.57667519351
5.57387276405
5.57087413371
5.56769688383
5.56435709801
5.5608694896
5.55724751823
5.55350349655
5.54964868783
