In [1]:
import numpy as np
import pandas as pd
from numpy.linalg import pinv
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import random
from pprint import pprint

In [2]:
word_labels = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "label"]
train_df = pd.read_csv("HousingData/housing_train.txt", delim_whitespace=True, names = word_labels, header=None) 
test_df = pd.read_csv("HousingData/housing_test.txt", delim_whitespace=True, names = word_labels, header=None) 


In [3]:
theta_len = len(train_df.columns)
theta_len

14

In [4]:

x = train_df.iloc[:, :-1]
y = train_df.iloc[:, -1]

x = (x - x.mean()) / x.std()
x = np.c_[np.ones(x.shape[0]), x] 

In [5]:
alpha = 0.0001 
iterations = 2000 
m = y.size
np.random.seed(123) 
theta = np.random.rand(theta_len) 


def gradient_descent(x, y, theta, iterations, alpha):
    costs = []
    thetas = [theta]
    for i in range(iterations):
        hypothesis = np.dot(x, theta)
        error = hypothesis - y
        cost = 1/2 * np.dot(error.T, error)
        costs.append(cost)
        theta = theta - (alpha * np.dot(x.T, error))
        thetas.append(theta)
        print(theta)
    return thetas, costs


thetas, costs = gradient_descent(x, y, theta, iterations, alpha)
theta = thetas[-1]



[ 1.65646207  0.0791535   0.39620484  0.30185796  0.7656712   0.19461225
  1.25569793  0.48444725  0.61971407  0.16638693  0.08973621  0.48009052
  0.58904475 -0.27837564]
[ 2.57488726 -0.07552635  0.51589148  0.12192174  0.80758965  0.0335837
  1.48566218  0.34440579  0.69838096  0.00692586 -0.09482764  0.27905412
  0.69956255 -0.54920004]
[ 3.45354464 -0.19160802  0.59934033 -0.00718418  0.84558808 -0.07847963
  1.68100287  0.2481166   0.73382963 -0.10395825 -0.2288433   0.1141968
  0.78076764 -0.76976177]
[ 4.29415616 -0.27917013  0.65639976 -0.09916989  0.87999041 -0.15512741
  1.84940168  0.18343456  0.73841854 -0.17925781 -0.32577275 -0.02316305
  0.84047155 -0.95256065]
[ 5.0983692  -0.34562563  0.69428983 -0.16408959  0.9110875  -0.20627621
  1.99657948  0.14147518  0.72117632 -0.22851381 -0.39550329 -0.13945512
  0.88440762 -1.10681473]
[ 5.86775981 -0.3964293   0.71830139 -0.20931386  0.93914244 -0.23917686
  2.12681313  0.1157461   0.68868932 -0.25873263 -0.44529718 -0.23945

[22.86709096 -0.75464147  0.87760954 -0.25791125  0.81887563 -1.6246199
  2.88611407  0.07111489 -2.98751434  1.71873801 -1.14537771 -2.02309239
  0.86276574 -4.07142365]
[22.86709592 -0.75506572  0.87862975 -0.25756527  0.8187348  -1.62737695
  2.88473646  0.07165316 -2.99014886  1.72250695 -1.14833217 -2.02390449
  0.86239132 -4.07205314]
[22.86710067 -0.75548814  0.87964367 -0.25721738  0.8185946  -1.63011611
  2.88336946  0.07219029 -2.99276182  1.7262644  -1.15128234 -2.0247115
  0.86201985 -4.07267595]
[22.86710521 -0.75590873  0.88065136 -0.2568676   0.81845502 -1.63283748
  2.88201299  0.07272625 -2.9953534   1.73001042 -1.15422821 -2.02551347
  0.8616513  -4.07329216]
[22.86710955 -0.75632749  0.88165285 -0.25651596  0.81831607 -1.63554118
  2.88066695  0.07326101 -2.99792379  1.73374505 -1.15716977 -2.02631043
  0.86128564 -4.07390185]
[22.86711371 -0.75674442  0.88264816 -0.2561625   0.81817773 -1.63822731
  2.87933123  0.07379454 -3.00047315  1.73746835 -1.160107   -2.02710

[22.86720554 -0.81672296  1.00772952 -0.16261799  0.79577624 -1.95322913
  2.72478717  0.15478993 -3.26497382  2.37972781 -1.72981977 -2.12957533
  0.82256881 -4.13138983]
[22.86720554 -0.81686552  1.00798664 -0.16227118  0.79571057 -1.95373954
  2.72449475  0.15497345 -3.26532994  2.381694   -1.73173775 -2.12979127
  0.82251571 -4.13147262]
[22.86720554 -0.81700749  1.00824254 -0.16192511  0.79564508 -1.95424608
  2.72420386  0.15515605 -3.26568299  2.38365536 -1.73365194 -2.13000612
  0.82246306 -4.13155489]
[22.86720554 -0.81714888  1.00849721 -0.16157977  0.79557977 -1.95474878
  2.72391448  0.15533773 -3.26603299  2.38561188 -1.73556233 -2.13021987
  0.82241086 -4.13163666]
[22.86720554 -0.81728967  1.00875068 -0.16123517  0.79551464 -1.95524765
  2.72362661  0.15551849 -3.26637996  2.38756359 -1.73746894 -2.13043253
  0.82235911 -4.13171792]
[22.86720554 -0.81742988  1.00900294 -0.16089131  0.79544968 -1.95574273
  2.72334024  0.15569835 -3.26672393  2.38951051 -1.73937177 -2.130

[22.86720554 -0.83677707  1.04267269 -0.10687266  0.7855053  -2.00382794
  2.68665617  0.17839027 -3.29772877  2.69487343 -2.04806464 -2.15715445
  0.81766473 -4.14174494]
[22.86720554 -0.83684507  1.04278855 -0.10666102  0.78546667 -2.00391254
  2.68653677  0.17846024 -3.29777564  2.69609208 -2.04933298 -2.15723511
  0.81765785 -4.14177697]
[22.86720554 -0.83691284  1.04290404 -0.10644993  0.78542814 -2.0039962
  2.68641781  0.17852991 -3.29782192  2.69730794 -2.05059867 -2.1573154
  0.81765107 -4.14180888]
[22.86720554 -0.8369804   1.04301916 -0.10623939  0.7853897  -2.00407891
  2.68629929  0.17859926 -3.29786759  2.69852101 -2.05186171 -2.15739533
  0.81764438 -4.14184068]
[22.86720554 -0.83704774  1.04313391 -0.10602938  0.78535136 -2.00416069
  2.68618121  0.17866831 -3.29791268  2.6997313  -2.0531221  -2.15747489
  0.81763777 -4.14187237]
[22.86720554 -0.83711487  1.04324829 -0.10581992  0.78531312 -2.00424154
  2.68606357  0.17873706 -3.29795717  2.70093882 -2.05437985 -2.15755

[22.86720554 -0.84773766  1.0614125  -0.07132063  0.77895864 -2.00836916
  2.66815752  0.18847308 -3.29957258  2.90551146 -2.27060282 -2.16857732
  0.8174725  -4.14683369]
[22.86720554 -0.84777402  1.06147523 -0.07119901  0.77893596 -2.00835371
  2.66809842  0.18850232 -3.29956068  2.90625569 -2.2713993  -2.16860961
  0.81747483 -4.14685053]
[22.86720554 -0.84781029  1.0615378  -0.0710777   0.77891333 -2.00833812
  2.66803949  0.18853145 -3.29954871  2.90699826 -2.27219407 -2.16864179
  0.81747717 -4.14686732]
[22.86720554 -0.84784646  1.06160022 -0.07095668  0.77889075 -2.00832237
  2.66798072  0.18856048 -3.29953667  2.90773919 -2.27298714 -2.16867384
  0.81747952 -4.14688407]
[22.86720554 -0.84788254  1.06166248 -0.07083595  0.77886823 -2.00830647
  2.66792212  0.18858941 -3.29952455  2.90847847 -2.27377851 -2.16870577
  0.81748189 -4.14690078]
[22.86720554 -0.84791853  1.06172458 -0.07071553  0.77884576 -2.00829042
  2.66786368  0.18861824 -3.29951235  2.90921612 -2.27456819 -2.168

[22.86720554 -0.85508065  1.0742074  -0.04655268  0.77428631 -2.00164791
  2.65645478  0.19383546 -3.2952616   3.06071436 -2.43781335 -2.17437749
  0.81827451 -4.15025485]
[22.86720554 -0.8550992   1.07424005 -0.04648984  0.77427432 -2.00162287
  2.65642571  0.19384775 -3.29524663  3.06111704 -2.43824956 -2.17439048
  0.81827727 -4.15026355]
[22.86720554 -0.85511771  1.07427264 -0.04642715  0.77426236 -2.00159785
  2.65639671  0.19386    -3.29523167  3.06151883 -2.43868483 -2.17440343
  0.81828002 -4.15027223]
[22.86720554 -0.85513618  1.07430515 -0.04636459  0.77425042 -2.00157285
  2.65636778  0.19387222 -3.29521673  3.06191974 -2.43911916 -2.17441634
  0.81828277 -4.1502809 ]
[22.86720554 -0.8551546   1.07433759 -0.04630218  0.77423851 -2.00154788
  2.65633892  0.19388441 -3.29520181  3.06231978 -2.43955255 -2.17442921
  0.81828551 -4.15028954]
[22.86720554 -0.85517299  1.07436995 -0.04623991  0.77422663 -2.00152294
  2.65631012  0.19389657 -3.29518691  3.06271895 -2.439985   -2.174

[22.86720554 -0.85934178  1.08174598 -0.03212506  0.77151868 -1.99514112
  2.6498225   0.19653183 -3.29146822  3.15412265 -2.53921789 -2.17719607
  0.81897358 -4.15225995]
[22.86720554 -0.85935079  1.08176198 -0.0320946   0.77151281 -1.99512606
  2.64980856  0.1965373  -3.29145961  3.15432161 -2.53943426 -2.17720174
  0.81897517 -4.1522642 ]
[22.86720554 -0.85935977  1.08177794 -0.0320642   0.77150695 -1.99511103
  2.64979465  0.19654276 -3.29145101  3.15452014 -2.53965015 -2.1772074
  0.81897676 -4.15226844]
[22.86720554 -0.85936874  1.08179387 -0.03203387  0.7715011  -1.99509603
  2.64978077  0.19654821 -3.29144243  3.15471824 -2.53986558 -2.17721304
  0.81897834 -4.15227267]
[22.86720554 -0.85937769  1.08180977 -0.03200361  0.77149527 -1.99508105
  2.64976692  0.19655365 -3.29143387  3.15491591 -2.54008054 -2.17721866
  0.81897993 -4.15227689]
[22.86720554 -0.85938661  1.08182563 -0.03197342  0.77148945 -1.99506611
  2.6497531   0.19655907 -3.29142533  3.15511315 -2.54029503 -2.1772

[22.86720554 -0.8610712   1.08482228 -0.02627824  0.77039024 -1.99218285
  2.64714942  0.19757126 -3.28978526  3.19240474 -2.58086649 -2.17826934
  0.81928492 -4.15307598]
[22.86720554 -0.86107642  1.08483157 -0.02626061  0.77038684 -1.99217375
  2.64714137  0.19757437 -3.28978011  3.19252043 -2.5809924  -2.17827254
  0.81928588 -4.15307844]
[22.86720554 -0.86108162  1.08484085 -0.02624301  0.77038344 -1.99216468
  2.64713333  0.19757746 -3.28977497  3.19263587 -2.58111804 -2.17827573
  0.81928683 -4.1530809 ]
[22.86720554 -0.86108682  1.0848501  -0.02622546  0.77038004 -1.99215562
  2.64712531  0.19758056 -3.28976984  3.19275106 -2.5812434  -2.17827892
  0.81928778 -4.15308335]
[22.86720554 -0.861092    1.08485933 -0.02620794  0.77037666 -1.99214658
  2.64711731  0.19758364 -3.28976472  3.19286599 -2.58136849 -2.17828209
  0.81928873 -4.1530858 ]
[22.86720554 -0.86109717  1.08486854 -0.02619046  0.77037328 -1.99213756
  2.64710932  0.19758672 -3.28975961  3.19298068 -2.58149331 -2.178

[22.86720554 -0.86184871  1.08620722 -0.02365141  0.76988247 -1.99081935
  2.64594965  0.19803248 -3.28901425  3.20965275 -2.59964056 -2.17874418
  0.81942778 -4.153443  ]
[22.86720554 -0.86185224  1.0862135  -0.02363951  0.76988017 -1.99081314
  2.64594422  0.19803457 -3.28901074  3.20973094 -2.59972568 -2.17874632
  0.81942843 -4.15344467]
[22.86720554 -0.86185575  1.08621976 -0.02362763  0.76987788 -1.99080694
  2.6459388   0.19803665 -3.28900724  3.20980896 -2.59981061 -2.17874846
  0.81942908 -4.15344633]
[22.86720554 -0.86185926  1.08622601 -0.02361578  0.76987558 -1.99080075
  2.64593339  0.19803872 -3.28900375  3.20988681 -2.59989536 -2.17875059
  0.81942973 -4.15344798]
[22.86720554 -0.86186276  1.08623225 -0.02360396  0.7698733  -1.99079458
  2.64592799  0.19804079 -3.28900026  3.2099645  -2.59997992 -2.17875272
  0.81943037 -4.15344963]
[22.86720554 -0.86186625  1.08623847 -0.02359216  0.76987102 -1.99078842
  2.6459226   0.19804286 -3.28899679  3.21004201 -2.6000643  -2.178

In [6]:
theta

array([22.86720554, -0.86195773,  1.08640147, -0.02328317,  0.76981126,
       -1.99062703,  2.6457815 ,  0.19809694, -3.28890567,  3.21207235,
       -2.60227456, -2.17881049,  0.81944791, -4.15349447])

In [7]:
y_pred_train = theta.T.dot(x.T)

In [8]:
y_pred_train = y_pred_train.T 
mse = np.mean((y - y_pred_train)**2)
mse

22.081399384924232

In [9]:
x_test = test_df.iloc[:, :-1]
y_test = test_df.iloc[:, -1]

x_test = (x_test - x_test.mean()) / x_test.std()
x_test = np.c_[np.ones(x_test.shape[0]), x_test] 

In [10]:
y_pred = theta.T.dot(x_test.T)

In [11]:
y_pred = y_pred.T 
mse_test = np.mean((y_test - y_pred)**2)
mse_test

25.218500989338214