In [1]:
import numpy as np
import pandas as pd

In [2]:
""" functions definitions """
def lossFunction(w):
    return sum((w.dot(x)-y)**2 for x, y in points)/len(x)

def gradientFunction(w):
    return sum(2*(w.dot(x)-y)*x for x, y in points)/len(x)

def gradientDescent(dimensions, eta = 0.1, iterations = 100, printing = 0):
    w = np.zeros(dimensions)
    for i in range (iterations):
        fw = lossFunction(w)
        gradient = gradientFunction(w)
        w -= eta * gradient
        if(printing == 1):
            print("iteration = {}, w {}, fw: {}".format(i, w, fw))
    print("Loss = ", fw)
    return w

In [3]:
"""" data reading and processing """
# reading data
data = pd.read_csv("cubic_zirconia.csv")

# converting strings to int
clr = data['color'].unique()
dict = {}
for i in range(len(clr)):
    dict[clr[i]] = i+1
data['color'] = data['color'].apply(lambda i: dict[i])

cut = data['cut'].unique()
dict = {}
for i in range(len(cut)):
    dict[cut[i]] = i+1
data['cut'] = data['cut'].apply(lambda i: dict[i])

data.drop("clarity", axis = 1, inplace = True)
data.dropna(axis='index', inplace = True)

x = data[['carat']]
y = data['price']
d = x.shape[1]

#data normalization
x = np.array(x.apply(lambda rec:(rec-rec.mean())/rec.std()))
y = np.array((y-y.mean())/y.std())


points = []
for i in range(len(x)):
    points.append((x[i], y[i]))
        
print(f"Features = 1, Iterations = 100, Learning rate = 0.5, Optimal w = {gradientDescent(d, 0.5)}")
print(f"Features = 1, Iterations = 100, Learning rate = 0.1, Optimal w = {gradientDescent(d, 0.1)}")
print(f"Features = 1, Iterations = 100, Learning rate = 0.01, Optimal w = {gradientDescent(d, 0.01)}")
print(f"Features = 1, Iterations = 1000, Learning rate = 0.001, Optimal w = {gradientDescent(d, 0.001, iterations = 1000, printing = 1)}")

Loss =  0.14966393890862925
Features = 1, Iterations = 100, Learning rate = 0.5, Optimal w = [0.92213359]
Loss =  0.1496639389086296
Features = 1, Iterations = 100, Learning rate = 0.1, Optimal w = [0.92213359]
Loss =  0.1652379666741372
Features = 1, Iterations = 100, Learning rate = 0.01, Optimal w = [0.79983114]
iteration = 0, w [0.0018442], fw: 0.9999619337647484
iteration = 1, w [0.00368471], fw: 0.9965642721889514
iteration = 2, w [0.00552153], fw: 0.9931801871524707
iteration = 3, w [0.00735469], fw: 0.9898096244055795
iteration = 4, w [0.00918418], fw: 0.9864525299152352
iteration = 5, w [0.01101001], fw: 0.9831088498643064
iteration = 6, w [0.01283218], fw: 0.9797785306507664
iteration = 7, w [0.01465072], fw: 0.9764615188867308
iteration = 8, w [0.01646561], fw: 0.9731577613976334
iteration = 9, w [0.01827688], fw: 0.9698672052214218
iteration = 10, w [0.02008453], fw: 0.9665897976076789
iteration = 11, w [0.02188855], fw: 0.9633254860167012
iteration = 12, w [0.02368898], fw

iteration = 145, w [0.23370689], fw: 0.625478508377648
iteration = 146, w [0.23508369], fw: 0.6235772256630252
iteration = 147, w [0.23645774], fw: 0.6216835401852246
iteration = 148, w [0.23782904], fw: 0.6197974215868092
iteration = 149, w [0.23919759], fw: 0.6179188396317057
iteration = 150, w [0.24056341], fw: 0.6160477642046319
iteration = 151, w [0.2419265], fw: 0.6141841653106341
iteration = 152, w [0.24328686], fw: 0.6123280130746078
iteration = 153, w [0.24464451], fw: 0.6104792777408659
iteration = 154, w [0.24599943], fw: 0.6086379296725913
iteration = 155, w [0.24735165], fw: 0.6068039393513812
iteration = 156, w [0.24870116], fw: 0.6049772773767893
iteration = 157, w [0.25004798], fw: 0.6031579144658721
iteration = 158, w [0.2513921], fw: 0.6013458214526843
iteration = 159, w [0.25273353], fw: 0.5995409692878035
iteration = 160, w [0.25407228], fw: 0.5977433290379126
iteration = 161, w [0.25540835], fw: 0.5959528718852847
iteration = 162, w [0.25674175], fw: 0.594169569127

iteration = 292, w [0.40921062], fw: 0.41379934169804905
iteration = 293, w [0.41023643], fw: 0.41274389676662937
iteration = 294, w [0.41126019], fw: 0.4116926692327707
iteration = 295, w [0.41228189], fw: 0.41064564224438893
iteration = 296, w [0.41330156], fw: 0.4096027990167459
iteration = 297, w [0.41431918], fw: 0.40856412283216886
iteration = 298, w [0.41533477], fw: 0.40752959703979647
iteration = 299, w [0.41634833], fw: 0.4064992050552704
iteration = 300, w [0.41735986], fw: 0.405472930360546
iteration = 301, w [0.41836937], fw: 0.40445075650354234
iteration = 302, w [0.41937686], fw: 0.4034326670979445
iteration = 303, w [0.42038234], fw: 0.40241864582290005
iteration = 304, w [0.4213858], fw: 0.40140867642278805
iteration = 305, w [0.42238726], fw: 0.40040274270690945
iteration = 306, w [0.42338672], fw: 0.3994008285493079
iteration = 307, w [0.42438417], fw: 0.3984029178884277
iteration = 308, w [0.42537963], fw: 0.3974089947269036
iteration = 309, w [0.4263731], fw: 0.396

iteration = 438, w [0.53920657], fw: 0.2968797122509077
iteration = 439, w [0.5399724], fw: 0.29629146039159704
iteration = 440, w [0.54073669], fw: 0.2957055590973339
iteration = 441, w [0.54149946], fw: 0.29512199897560426
iteration = 442, w [0.54226069], fw: 0.29454077067144463
iteration = 443, w [0.54302041], fw: 0.29396186486726544
iteration = 444, w [0.54377861], fw: 0.29338527228270356
iteration = 445, w [0.54453529], fw: 0.2928109836744944
iteration = 446, w [0.54529046], fw: 0.2922389898362962
iteration = 447, w [0.54604412], fw: 0.2916692815985566
iteration = 448, w [0.54679627], fw: 0.291101849828359
iteration = 449, w [0.54754691], fw: 0.2905366854292845
iteration = 450, w [0.54829606], fw: 0.2899737793412747
iteration = 451, w [0.5490437], fw: 0.2894131225404423
iteration = 452, w [0.54978985], fw: 0.2888547060389967
iteration = 453, w [0.55053451], fw: 0.2882985208850288
iteration = 454, w [0.55127768], fw: 0.2877445581624186
iteration = 455, w [0.55201937], fw: 0.2871928

iteration = 585, w [0.63682789], fw: 0.23138673615737984
iteration = 586, w [0.63739848], fw: 0.23106018427820452
iteration = 587, w [0.63796793], fw: 0.23073493725071348
iteration = 588, w [0.63853624], fw: 0.23041098986092545
iteration = 589, w [0.63910341], fw: 0.2300883369156748
iteration = 590, w [0.63966945], fw: 0.22976697324256115
iteration = 591, w [0.64023436], fw: 0.2294468936898541
iteration = 592, w [0.64079813], fw: 0.22912809312639987
iteration = 593, w [0.64136078], fw: 0.22881056644155182
iteration = 594, w [0.64192231], fw: 0.22849430854508695
iteration = 595, w [0.64248271], fw: 0.22817931436711472
iteration = 596, w [0.64304199], fw: 0.2278655788580088
iteration = 597, w [0.64360015], fw: 0.22755309698832674
iteration = 598, w [0.6441572], fw: 0.22724186374871058
iteration = 599, w [0.64471313], fw: 0.22693187414982632
iteration = 600, w [0.64526795], fw: 0.22662312322226372
iteration = 601, w [0.64582166], fw: 0.22631560601649597
iteration = 602, w [0.64637426], fw

iteration = 730, w [0.70870935], fw: 0.19539484480744565
iteration = 731, w [0.70913619], fw: 0.1952121110567608
iteration = 732, w [0.70956216], fw: 0.195030107482377
iteration = 733, w [0.70998729], fw: 0.19484883116661775
iteration = 734, w [0.71041157], fw: 0.19466827920346966
iteration = 735, w [0.710835], fw: 0.19448844869852994
iteration = 736, w [0.71125758], fw: 0.19430933676896145
iteration = 737, w [0.71167931], fw: 0.1941309405434474
iteration = 738, w [0.71210021], fw: 0.19395325716213668
iteration = 739, w [0.71252026], fw: 0.1937762837766194
iteration = 740, w [0.71293947], fw: 0.19360001754985523
iteration = 741, w [0.71335784], fw: 0.19342445565615013
iteration = 742, w [0.71377537], fw: 0.19324959528109634
iteration = 743, w [0.71419208], fw: 0.19307543362152438
iteration = 744, w [0.71460794], fw: 0.1929019678854794
iteration = 745, w [0.71502298], fw: 0.19272919529215626
iteration = 746, w [0.71543718], fw: 0.19255711307186213
iteration = 747, w [0.71585056], fw: 0.

iteration = 876, w [0.76279991], fw: 0.17515204314017235
iteration = 877, w [0.76311856], fw: 0.17505019654884532
iteration = 878, w [0.76343658], fw: 0.1749487569210234
iteration = 879, w [0.76375396], fw: 0.17484772263053758
iteration = 880, w [0.76407071], fw: 0.17474709205772165
iteration = 881, w [0.76438682], fw: 0.17464686358938486
iteration = 882, w [0.7647023], fw: 0.1745470356187742
iteration = 883, w [0.76501715], fw: 0.17444760654556934
iteration = 884, w [0.76533137], fw: 0.1743485747758264
iteration = 885, w [0.76564497], fw: 0.1742499387219894
iteration = 886, w [0.76595793], fw: 0.17415169680283046
iteration = 887, w [0.76627027], fw: 0.17405384744345767
iteration = 888, w [0.76658199], fw: 0.17395638907525227
iteration = 889, w [0.76689308], fw: 0.1738593201358782
iteration = 890, w [0.76720355], fw: 0.17376263906923464
iteration = 891, w [0.7675134], fw: 0.17366634432544192
iteration = 892, w [0.76782262], fw: 0.1735704343608108
iteration = 893, w [0.76813123], fw: 0.

In [4]:
x = data[['carat', 'color', 'cut']]
y = data['price']
d = x.shape[1]

# data normalization
x = np.array(x.apply(lambda rec:(rec-rec.mean())/rec.std()))
y = np.array((y-y.mean())/y.std())

points = []
for i in range(len(x)):
    points.append((x[i], y[i]))
    
print(f"Features = 3, Iterations = 100, Learning rate = 0.5, Optimal w = {gradientDescent(d, 0.5)}")
print(f"Features = 3, Iterations = 100, Learning rate = 0.1, Optimal w = {gradientDescent(d, 0.1)}")
print(f"Features = 3, Iterations = 100, Learning rate = 0.01, Optimal w = {gradientDescent(d, 0.01)}")
print(f"Features = 3, Iterations = 100, Learning rate = 0.001, Optimal w = {gradientDescent(d, 0.001)}")

Loss =  0.1357514311226255
Features = 3, Iterations = 100, Learning rate = 0.5, Optimal w = [ 0.95692962 -0.09814202 -0.07098246]
Loss =  0.13575143112264043
Features = 3, Iterations = 100, Learning rate = 0.1, Optimal w = [ 0.95692952 -0.09814195 -0.07098242]
Loss =  0.16227679394573538
Features = 3, Iterations = 100, Learning rate = 0.01, Optimal w = [ 0.79825459 -0.01541505 -0.02470005]
Loss =  0.7183541539480602
Features = 3, Iterations = 100, Learning rate = 0.001, Optimal w = [0.16661795 0.02155038 0.00870329]


In [5]:
x = data[['carat', 'color', 'cut', 'depth', 'table']]
y = data['price']
d = x.shape[1]

# data normalization
x = np.array(x.apply(lambda rec:(rec-rec.mean())/rec.std()))
y = np.array((y-y.mean())/y.std())

points = []
for i in range(len(x)):
    points.append((x[i], y[i]))
    
print(f"Features = 5, Iterations = 100, Learning rate = 0.5, Optimal w = {gradientDescent(d, 0.5)}")
print(f"Features = 5, Iterations = 100, Learning rate = 0.1, Optimal w = {gradientDescent(d, 0.1)}")
print(f"Features = 5, Iterations = 100, Learning rate = 0.01, Optimal w = {gradientDescent(d, 0.01)}")
print(f"Features = 5, Iterations = 100, Learning rate = 0.001, Optimal w = {gradientDescent(d, 0.001)}")

Loss =  0.1348142549157066
Features = 5, Iterations = 100, Learning rate = 0.5, Optimal w = [ 0.96085669 -0.09752813 -0.05064569 -0.03063951 -0.03227716]
Loss =  0.1348142564020076
Features = 5, Iterations = 100, Learning rate = 0.1, Optimal w = [ 0.96085174 -0.09752807 -0.05068108 -0.03060971 -0.03223804]
Loss =  0.16273014176623032
Features = 5, Iterations = 100, Learning rate = 0.01, Optimal w = [ 0.79541529 -0.01479002 -0.03256153 -0.01275281  0.01406768]
Loss =  0.7156895760027296
Features = 5, Iterations = 100, Learning rate = 0.001, Optimal w = [ 0.16626825  0.02151172  0.00783853 -0.00072655  0.01961347]


In [6]:
x = data[['carat', 'color', 'cut', 'depth', 'x', 'y', 'z']]
y = data['price']
d = x.shape[1]

# data normalization
x = np.array(x.apply(lambda rec:(rec-rec.mean())/rec.std()))
y = np.array((y-y.mean())/y.std())

points = []
for i in range(len(x)):
    points.append((x[i], y[i]))
    
print(f"Features = 7, Iterations = 100, Learning rate = 0.5, Optimal w = {gradientDescent(d, 0.5)}")
print(f"Features = 7, Iterations = 100, Learning rate = 0.1, Optimal w = {gradientDescent(d, 0.1)}")
print(f"Features = 7, Iterations = 100, Learning rate = 0.01, Optimal w = {gradientDescent(d, 0.01)}")
print(f"Features = 7, Iterations = 100, Learning rate = 0.001, Optimal w = {gradientDescent(d, 0.001)}")

Loss =  2.048077891295911e+94
Features = 7, Iterations = 100, Learning rate = 0.5, Optimal w = [-1.05370694e+47 -3.35698914e+46 -2.03541884e+46 -5.30759922e+45
 -1.06094716e+47 -1.04240174e+47 -1.06079607e+47]
Loss =  0.14240287733075724
Features = 7, Iterations = 100, Learning rate = 0.1, Optimal w = [ 0.78200635 -0.09022727 -0.06575634 -0.01608713  0.10031636 -0.00295004
  0.06184821]
Loss =  0.17809895519569366
Features = 7, Iterations = 100, Learning rate = 0.01, Optimal w = [ 0.30734046 -0.05567691 -0.04965099 -0.01346959  0.22099967  0.1882748
  0.2103628 ]
Loss =  0.36666310913884687
Features = 7, Iterations = 100, Learning rate = 0.001, Optimal w = [ 0.13038446  0.01343434  0.0038394  -0.00188877  0.12269669  0.11749237
  0.12091656]
