## Testing Places365 CNN (AlexNet)

In [1]:
import torch
from torch.autograd import Variable as V
import torchvision.models as models
from torchvision import transforms as trn
from torch.nn import functional as F
import os
from PIL import Image
import numpy as np
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
import pandas as pd

In [2]:
# PlacesCNN for scene classification
#
# by Bolei Zhou
# last modified by Bolei Zhou, Dec.27, 2017 with latest pytorch and torchvision 
# (upgrade your torchvision please if there is trn.Resize error)



# the architecture to use
arch = 'alexnet'

# load the pre-trained weights
model_file = '%s_places365.pth.tar' % arch
if not os.access(model_file, os.W_OK):
    weight_url = 'http://places2.csail.mit.edu/models_places365/' + model_file
    os.system('wget ' + weight_url)

model = models.__dict__[arch](num_classes=365)
checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)
state_dict = {str.replace(k,'module.',''): v for k,v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict)
model.eval()


# load the image transformer
centre_crop = trn.Compose([
        trn.Resize((256,256)),
        trn.CenterCrop(224),
        trn.ToTensor(),
        trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# load the class label
file_name = 'categories_places365.txt'
if not os.access(file_name, os.W_OK):
    synset_url = 'https://raw.githubusercontent.com/csailvision/places365/master/categories_places365.txt'
    os.system('wget ' + synset_url)
classes = list()
with open(file_name) as class_file:
    for line in class_file:
        classes.append(line.strip().split(' ')[0][3:])
classes = tuple(classes)

# load the test image
img_name = '1_in.jpg'
if not os.access(img_name, os.W_OK):
    img_url = 'http://places.csail.mit.edu/demo/' + img_name
    os.system('wget ' + img_url)

img = Image.open(img_name)
input_img = V(centre_crop(img).unsqueeze(0))

# forward pass
logit = model.forward(input_img)
h_x = F.softmax(logit, 1).data.squeeze()
probs, idx = h_x.sort(0, True)

print('{} prediction on {}'.format(arch,img_name))
# output the prediction
for i in range(0, 5):
    print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

alexnet prediction on 1_in.jpg
0.329 -> mezzanine
0.191 -> living_room
0.158 -> entrance_hall
0.123 -> television_room
0.090 -> artists_loft


## Viewing Model architecture

In [3]:
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

## Amending the classifier layer to include only untill fc4 layer

In [4]:
new_classifier = nn.Sequential(*list(model.classifier.children())[:-1])
model.classifier = new_classifier

## Extracting 4096 features by passing the image through our new model

Each image (5) of each house (2000), are passed through the CNN to isolate values (4096) from FC4 layer.
These values are then stored in the form of .npy files in a separate folder for each house.

In [5]:
# Get the list of all files and directories
# in the root directory
path = r"C:\Users\Vaddi\CPS 595\Houses"
dir_list = os.listdir(path)

dir_list = [x.lower() for x in dir_list]

In [6]:
type(dir_list)

list

In [7]:
#applying natural sort on directory list
from natsort import os_sorted
dir_list = os_sorted(dir_list)

In [8]:
#validating list of files
dir_df = pd.DataFrame(dir_list, columns = ['file_names'])
dir_df_grp = dir_df.copy()
temp = dir_df['file_names'].str.split('_', expand = True)
dir_df_grp['sno'] = temp[0]
dir_df_grp['room_name'] = temp[1]

dir_df_grp = dir_df_grp[['sno','room_name']]

dir_df_count = dir_df_grp.groupby('sno').count()

#to check if each house has exactly 5 pictures
dir_df_count[dir_df_count['room_name'] != 5]


Unnamed: 0_level_0,room_name
sno,Unnamed: 1_level_1


In [9]:
dir_df

Unnamed: 0,file_names
0,1_bath.jpg
1,1_bed.jpg
2,1_in.jpg
3,1_kit.jpg
4,1_out.jpg
...,...
14995,3000_bath.jpg
14996,3000_bed.jpg
14997,3000_in.jpg
14998,3000_kit.jpg


In [10]:
15000//5+1

3001

In [11]:

#for each house
for x in range(1, (len(dir_df)//5+1), 1):
    print('at house ' + str(x))
    house_pics = dir_df[dir_df['file_names'].str.startswith(str(x)+'_')].copy()
    house_pics = house_pics.reset_index(drop=True)
    feats = []
    
    #for each house's pictures (5)
    for i in range(5):
        fname = house_pics['file_names'][i]
        img = Image.open(path+"\\"+fname)
        input_img = V(centre_crop(img).unsqueeze(0))
        feat = model.forward(input_img)
        feat = feat.tolist()[0]
        feats.extend(feat)

    path2 = 'house_feats' + '\\' + str(x)
    os.mkdir(path2)

    np.save(path2 + '\\'+ str(x), feats)
    print(str(x) + ' house done')




at house 1
1 house done
at house 2
2 house done
at house 3
3 house done
at house 4
4 house done
at house 5
5 house done
at house 6
6 house done
at house 7
7 house done
at house 8
8 house done
at house 9
9 house done
at house 10
10 house done
at house 11
11 house done
at house 12
12 house done
at house 13
13 house done
at house 14
14 house done
at house 15
15 house done
at house 16
16 house done
at house 17
17 house done
at house 18
18 house done
at house 19
19 house done
at house 20
20 house done
at house 21
21 house done
at house 22
22 house done
at house 23
23 house done
at house 24
24 house done
at house 25
25 house done
at house 26
26 house done
at house 27
27 house done
at house 28
28 house done
at house 29
29 house done
at house 30
30 house done
at house 31
31 house done
at house 32
32 house done
at house 33
33 house done
at house 34
34 house done
at house 35
35 house done
at house 36
36 house done
at house 37
37 house done
at house 38
38 house done
at house 39
39 house done
at h

302 house done
at house 303
303 house done
at house 304
304 house done
at house 305
305 house done
at house 306
306 house done
at house 307
307 house done
at house 308
308 house done
at house 309
309 house done
at house 310
310 house done
at house 311
311 house done
at house 312
312 house done
at house 313
313 house done
at house 314
314 house done
at house 315
315 house done
at house 316
316 house done
at house 317
317 house done
at house 318
318 house done
at house 319
319 house done
at house 320
320 house done
at house 321
321 house done
at house 322
322 house done
at house 323
323 house done
at house 324
324 house done
at house 325
325 house done
at house 326
326 house done
at house 327
327 house done
at house 328
328 house done
at house 329
329 house done
at house 330
330 house done
at house 331
331 house done
at house 332
332 house done
at house 333
333 house done
at house 334
334 house done
at house 335
335 house done
at house 336
336 house done
at house 337
337 house done
at ho

596 house done
at house 597
597 house done
at house 598
598 house done
at house 599
599 house done
at house 600
600 house done
at house 601
601 house done
at house 602
602 house done
at house 603
603 house done
at house 604
604 house done
at house 605
605 house done
at house 606
606 house done
at house 607
607 house done
at house 608
608 house done
at house 609
609 house done
at house 610
610 house done
at house 611
611 house done
at house 612
612 house done
at house 613
613 house done
at house 614
614 house done
at house 615
615 house done
at house 616
616 house done
at house 617
617 house done
at house 618
618 house done
at house 619
619 house done
at house 620
620 house done
at house 621
621 house done
at house 622
622 house done
at house 623
623 house done
at house 624
624 house done
at house 625
625 house done
at house 626
626 house done
at house 627
627 house done
at house 628
628 house done
at house 629
629 house done
at house 630
630 house done
at house 631
631 house done
at ho

889 house done
at house 890
890 house done
at house 891
891 house done
at house 892
892 house done
at house 893
893 house done
at house 894
894 house done
at house 895
895 house done
at house 896
896 house done
at house 897
897 house done
at house 898
898 house done
at house 899
899 house done
at house 900
900 house done
at house 901
901 house done
at house 902
902 house done
at house 903
903 house done
at house 904
904 house done
at house 905
905 house done
at house 906
906 house done
at house 907
907 house done
at house 908
908 house done
at house 909
909 house done
at house 910
910 house done
at house 911
911 house done
at house 912
912 house done
at house 913
913 house done
at house 914
914 house done
at house 915
915 house done
at house 916
916 house done
at house 917
917 house done
at house 918
918 house done
at house 919
919 house done
at house 920
920 house done
at house 921
921 house done
at house 922
922 house done
at house 923
923 house done
at house 924
924 house done
at ho

1170 house done
at house 1171
1171 house done
at house 1172
1172 house done
at house 1173
1173 house done
at house 1174
1174 house done
at house 1175
1175 house done
at house 1176
1176 house done
at house 1177
1177 house done
at house 1178
1178 house done
at house 1179
1179 house done
at house 1180
1180 house done
at house 1181
1181 house done
at house 1182
1182 house done
at house 1183
1183 house done
at house 1184
1184 house done
at house 1185
1185 house done
at house 1186
1186 house done
at house 1187
1187 house done
at house 1188
1188 house done
at house 1189
1189 house done
at house 1190
1190 house done
at house 1191
1191 house done
at house 1192
1192 house done
at house 1193
1193 house done
at house 1194
1194 house done
at house 1195
1195 house done
at house 1196
1196 house done
at house 1197
1197 house done
at house 1198
1198 house done
at house 1199
1199 house done
at house 1200
1200 house done
at house 1201
1201 house done
at house 1202
1202 house done
at house 1203
1203 house

1444 house done
at house 1445
1445 house done
at house 1446
1446 house done
at house 1447
1447 house done
at house 1448
1448 house done
at house 1449
1449 house done
at house 1450
1450 house done
at house 1451
1451 house done
at house 1452
1452 house done
at house 1453
1453 house done
at house 1454
1454 house done
at house 1455
1455 house done
at house 1456
1456 house done
at house 1457
1457 house done
at house 1458
1458 house done
at house 1459
1459 house done
at house 1460
1460 house done
at house 1461
1461 house done
at house 1462
1462 house done
at house 1463
1463 house done
at house 1464
1464 house done
at house 1465
1465 house done
at house 1466
1466 house done
at house 1467
1467 house done
at house 1468
1468 house done
at house 1469
1469 house done
at house 1470
1470 house done
at house 1471
1471 house done
at house 1472
1472 house done
at house 1473
1473 house done
at house 1474
1474 house done
at house 1475
1475 house done
at house 1476
1476 house done
at house 1477
1477 house

1719 house done
at house 1720
1720 house done
at house 1721
1721 house done
at house 1722
1722 house done
at house 1723
1723 house done
at house 1724
1724 house done
at house 1725
1725 house done
at house 1726
1726 house done
at house 1727
1727 house done
at house 1728
1728 house done
at house 1729
1729 house done
at house 1730
1730 house done
at house 1731
1731 house done
at house 1732
1732 house done
at house 1733
1733 house done
at house 1734
1734 house done
at house 1735
1735 house done
at house 1736
1736 house done
at house 1737
1737 house done
at house 1738
1738 house done
at house 1739
1739 house done
at house 1740
1740 house done
at house 1741
1741 house done
at house 1742
1742 house done
at house 1743
1743 house done
at house 1744
1744 house done
at house 1745
1745 house done
at house 1746
1746 house done
at house 1747
1747 house done
at house 1748
1748 house done
at house 1749
1749 house done
at house 1750
1750 house done
at house 1751
1751 house done
at house 1752
1752 house

1995 house done
at house 1996
1996 house done
at house 1997
1997 house done
at house 1998
1998 house done
at house 1999
1999 house done
at house 2000
2000 house done
at house 2001
2001 house done
at house 2002
2002 house done
at house 2003
2003 house done
at house 2004
2004 house done
at house 2005
2005 house done
at house 2006
2006 house done
at house 2007
2007 house done
at house 2008
2008 house done
at house 2009
2009 house done
at house 2010
2010 house done
at house 2011
2011 house done
at house 2012
2012 house done
at house 2013
2013 house done
at house 2014
2014 house done
at house 2015
2015 house done
at house 2016
2016 house done
at house 2017
2017 house done
at house 2018
2018 house done
at house 2019
2019 house done
at house 2020
2020 house done
at house 2021
2021 house done
at house 2022
2022 house done
at house 2023
2023 house done
at house 2024
2024 house done
at house 2025
2025 house done
at house 2026
2026 house done
at house 2027
2027 house done
at house 2028
2028 house

2269 house done
at house 2270
2270 house done
at house 2271
2271 house done
at house 2272
2272 house done
at house 2273
2273 house done
at house 2274
2274 house done
at house 2275
2275 house done
at house 2276
2276 house done
at house 2277
2277 house done
at house 2278
2278 house done
at house 2279
2279 house done
at house 2280
2280 house done
at house 2281
2281 house done
at house 2282
2282 house done
at house 2283
2283 house done
at house 2284
2284 house done
at house 2285
2285 house done
at house 2286
2286 house done
at house 2287
2287 house done
at house 2288
2288 house done
at house 2289
2289 house done
at house 2290
2290 house done
at house 2291
2291 house done
at house 2292
2292 house done
at house 2293
2293 house done
at house 2294
2294 house done
at house 2295
2295 house done
at house 2296
2296 house done
at house 2297
2297 house done
at house 2298
2298 house done
at house 2299
2299 house done
at house 2300
2300 house done
at house 2301
2301 house done
at house 2302
2302 house

2543 house done
at house 2544
2544 house done
at house 2545
2545 house done
at house 2546
2546 house done
at house 2547
2547 house done
at house 2548
2548 house done
at house 2549
2549 house done
at house 2550
2550 house done
at house 2551
2551 house done
at house 2552
2552 house done
at house 2553
2553 house done
at house 2554
2554 house done
at house 2555
2555 house done
at house 2556
2556 house done
at house 2557
2557 house done
at house 2558
2558 house done
at house 2559
2559 house done
at house 2560
2560 house done
at house 2561
2561 house done
at house 2562
2562 house done
at house 2563
2563 house done
at house 2564
2564 house done
at house 2565
2565 house done
at house 2566
2566 house done
at house 2567
2567 house done
at house 2568
2568 house done
at house 2569
2569 house done
at house 2570
2570 house done
at house 2571
2571 house done
at house 2572
2572 house done
at house 2573
2573 house done
at house 2574
2574 house done
at house 2575
2575 house done
at house 2576
2576 house

2818 house done
at house 2819
2819 house done
at house 2820
2820 house done
at house 2821
2821 house done
at house 2822
2822 house done
at house 2823
2823 house done
at house 2824
2824 house done
at house 2825
2825 house done
at house 2826
2826 house done
at house 2827
2827 house done
at house 2828
2828 house done
at house 2829
2829 house done
at house 2830
2830 house done
at house 2831
2831 house done
at house 2832
2832 house done
at house 2833
2833 house done
at house 2834
2834 house done
at house 2835
2835 house done
at house 2836
2836 house done
at house 2837
2837 house done
at house 2838
2838 house done
at house 2839
2839 house done
at house 2840
2840 house done
at house 2841
2841 house done
at house 2842
2842 house done
at house 2843
2843 house done
at house 2844
2844 house done
at house 2845
2845 house done
at house 2846
2846 house done
at house 2847
2847 house done
at house 2848
2848 house done
at house 2849
2849 house done
at house 2850
2850 house done
at house 2851
2851 house

In [12]:
sample_feat = np.load('house_feats\\1540\\'+ '1540'+ '.npy')
# sample_feat.tolist()
# sample = []
# sample.append([sample_feat])
# sample
sample_feat

array([0.        , 0.        , 0.        , ..., 0.        , 0.        ,
       2.48142385])

# Concatening all .npy files vertically and saving

In [13]:
img_data = []
for i in range(1,3001):
    feat = np.load('house_feats\\' + str(i) + '\\'+ str(i) + '.npy')
    img_data.append(feat.tolist())


In [14]:
print(str(len(img_data)),'x',str(len(img_data[0])))

3000 x 20480


In [15]:
np.save('img_data', np.asarray(img_data))

# Importing data from Excel (Tabular)

In [18]:
tab_df = pd.read_excel('housing_data.xlsx')

In [19]:
null_data = tab_df[tab_df.isnull().any(axis=1)]
null_data

Unnamed: 0,NO,Address,Zip,PRICE,bed,bath,size,build year,lot area,HOA,Elementary,Middle,Highschool,Season (Winter or not)


In [20]:
tab_df

Unnamed: 0,NO,Address,Zip,PRICE,bed,bath,size,build year,lot area,HOA,Elementary,Middle,Highschool,Season (Winter or not)
0,1,"5316 Hidden Creek Cir, Mason, OH 45040",45040,420000,4,4.0,2019.0,1992,0.38,185.0,5,6,3,1
1,2,"6309 Tarton Fields Ln, Mason, OH 45040",45040,630000,4,5.0,4526.0,1997,0.51,495.0,5,6,3,1
2,3,"756 Tradewind Dr, Mason, OH 45040",45040,259755,3,2.0,1431.0,1974,0.40,0.0,5,6,3,1
3,4,"9639 Fox Run Dr, Mason, OH 45040",45040,396000,4,3.0,2523.0,1989,0.36,0.0,5,6,3,1
4,5,"5837 Lisa Ct, Mason, OH 45040",45040,385000,4,4.0,3322.0,1993,0.43,115.0,5,6,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,2996,"306 Loggerhead Dr, Columbia, SC 29229",29229,230000,3,2.0,1800.0,1998,0.23,12.0,6,6,6,1
2996,2997,"145 Aster Cir, Columbia, SC 29201",29201,115000,3,1.5,1000.0,1971,0.13,0.0,4,5,7,1
2997,2998,"204 Terrapin Trce E, Columbia, SC 29229",29229,285000,3,2.0,1864.0,2000,65.70,12.0,6,6,6,1
2998,2999,"1014 Laurens St, Columbia, SC 29201",29201,388000,3,3.0,2622.0,1910,0.12,0.0,5,5,7,1


In [21]:
tab_df.nunique()

NO                        3000
Address                   2982
Zip                        439
PRICE                     1122
bed                         10
bath                        18
size                      1635
build year                 156
lot area                   346
HOA                        237
Elementary                  11
Middle                      10
Highschool                  11
Season (Winter or not)       3
dtype: int64

## Applying transformations on the tabular dataset

In [22]:
#Dropping 'no' and 'address'
tab_df = tab_df.drop(['NO', 'Address '], axis = 1)

In [23]:
tab_df

Unnamed: 0,Zip,PRICE,bed,bath,size,build year,lot area,HOA,Elementary,Middle,Highschool,Season (Winter or not)
0,45040,420000,4,4.0,2019.0,1992,0.38,185.0,5,6,3,1
1,45040,630000,4,5.0,4526.0,1997,0.51,495.0,5,6,3,1
2,45040,259755,3,2.0,1431.0,1974,0.40,0.0,5,6,3,1
3,45040,396000,4,3.0,2523.0,1989,0.36,0.0,5,6,3,1
4,45040,385000,4,4.0,3322.0,1993,0.43,115.0,5,6,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...
2995,29229,230000,3,2.0,1800.0,1998,0.23,12.0,6,6,6,1
2996,29201,115000,3,1.5,1000.0,1971,0.13,0.0,4,5,7,1
2997,29229,285000,3,2.0,1864.0,2000,65.70,12.0,6,6,6,1
2998,29201,388000,3,3.0,2622.0,1910,0.12,0.0,5,5,7,1


In [24]:

minmax_df = tab_df[tab_df.columns[[0,2,3,4,5,6,7,8,9,10]]]

mmscaler = MinMaxScaler()
mmscaler2 = MinMaxScaler()

minmax_df = mmscaler.fit_transform(minmax_df)

tab_df[tab_df.columns[[0,2,3,4,5,6,7,8,9,10]]] = minmax_df

tab_df

Unnamed: 0,Zip,PRICE,bed,bath,size,build year,lot area,HOA,Elementary,Middle,Highschool,Season (Winter or not)
0,0.439636,420000,0.100000,0.003293,0.144035,0.054281,0.000025,0.062712,0.5,0.555556,0.3,1
1,0.439636,630000,0.100000,0.004391,0.323114,0.054544,0.000033,0.167797,0.5,0.555556,0.3,1
2,0.439636,259755,0.066667,0.001098,0.102034,0.053331,0.000026,0.000000,0.5,0.555556,0.3,1
3,0.439636,396000,0.100000,0.002195,0.180037,0.054122,0.000024,0.000000,0.5,0.555556,0.3,1
4,0.439636,385000,0.100000,0.003293,0.237111,0.054333,0.000028,0.038983,0.5,0.555556,0.3,1
...,...,...,...,...,...,...,...,...,...,...,...,...
2995,0.274910,230000,0.066667,0.001098,0.128392,0.054597,0.000015,0.004068,0.6,0.555556,0.6,1
2996,0.274619,115000,0.066667,0.000549,0.071247,0.053173,0.000009,0.000000,0.4,0.444444,0.7,1
2997,0.274910,285000,0.066667,0.001098,0.132963,0.054703,0.004315,0.004068,0.6,0.555556,0.6,1
2998,0.274619,388000,0.066667,0.002195,0.187109,0.049955,0.000008,0.000000,0.5,0.444444,0.7,1


In [25]:
#target
y = tab_df['PRICE']/10000

#saving target
np.save('target.npy', np.asarray(y))

#calculating normalized price 3 ways
nrm_price = (y.max() + y.min()) / 2
nrm_price_avg = y.mean()
nrm_price_med = y.median()

In [26]:
tab_df = tab_df.drop('PRICE', 1)
tab_df

  tab_df = tab_df.drop('PRICE', 1)


Unnamed: 0,Zip,bed,bath,size,build year,lot area,HOA,Elementary,Middle,Highschool,Season (Winter or not)
0,0.439636,0.100000,0.003293,0.144035,0.054281,0.000025,0.062712,0.5,0.555556,0.3,1
1,0.439636,0.100000,0.004391,0.323114,0.054544,0.000033,0.167797,0.5,0.555556,0.3,1
2,0.439636,0.066667,0.001098,0.102034,0.053331,0.000026,0.000000,0.5,0.555556,0.3,1
3,0.439636,0.100000,0.002195,0.180037,0.054122,0.000024,0.000000,0.5,0.555556,0.3,1
4,0.439636,0.100000,0.003293,0.237111,0.054333,0.000028,0.038983,0.5,0.555556,0.3,1
...,...,...,...,...,...,...,...,...,...,...,...
2995,0.274910,0.066667,0.001098,0.128392,0.054597,0.000015,0.004068,0.6,0.555556,0.6,1
2996,0.274619,0.066667,0.000549,0.071247,0.053173,0.000009,0.000000,0.4,0.444444,0.7,1
2997,0.274910,0.066667,0.001098,0.132963,0.054703,0.004315,0.004068,0.6,0.555556,0.6,1
2998,0.274619,0.066667,0.002195,0.187109,0.049955,0.000008,0.000000,0.5,0.444444,0.7,1


In [27]:
img_data = np.load(r'img_data.npy')

#minmax scaling images
# img_data_scaled  = mmscaler2.fit_transform(img_data)

# img_data_scaled = img_data_scaled.tolist()

# print(int(len(img_data_scaled[0])/5))

# l = [1,2,3,4,5,6]
# r = []
# for i in range(0, 7, 2):
#     r.append(sum(l[i:i+2]))
# r
    

# Computing l2 norm of each image (4096) for every house (20480 total for 5 images) 

In [28]:
def l2_convert(row):
    l2_imgs = []
    jump = int(len(row)/5)
    for i in range(0, len(row), jump):
        img = row[i:i+jump]
        img_norm = np.linalg.norm(img)
        img_normalized = img/img_norm
        l2_imgs.extend(img_normalized)
    return l2_imgs

#computing and appending l2 norm of each image for each house(2000)
img_data_l2 = list(map(l2_convert, img_data))

# print(str(len(img_data_l2)), 'x', str(len(img_data_l2[0])))
img_data_l2 = np.asarray(img_data_l2)
img_data_l2.shape

(3000, 20480)

In [29]:
tab_data = np.asarray(tab_df)

def tab_l2_convert(row):
    row_norm = np.linalg.norm(row)
    return row/row_norm 

tab_data_l2 = list(map(tab_l2_convert, tab_data))
tab_data_l2 = np.asarray(tab_data_l2)

In [30]:
print(img_data_l2.shape)
print(tab_data_l2.shape)

(3000, 20480)
(3000, 11)


# Concatenating tabular feat-dims (11) with image feat-dims (20480) for each house

In [31]:
final_data = np.concatenate((tab_data_l2, img_data_l2), axis = 1)

# shape of final dataset
print(final_data.shape)

(3000, 20491)


In [32]:
type(final_data)

numpy.ndarray

In [33]:
#saving final dataset
np.save('final_data',final_data)

# Loading saved final data 

In [34]:
final_data = np.load(r'final_data.npy')

In [35]:
#Train test split
final_data_train, final_data_test, y_train, y_test = train_test_split(final_data, y, test_size = 0.1, random_state = 1)

In [36]:
final_data_train.shape

(2700, 20491)

# Constructing linear multi-kernel for SVR and normalizing ground truth values

In [37]:
# building training kernel
mlk_train = np.dot(final_data_train, final_data_train.transpose())

In [38]:
# building testing kernel
mlk_test = np.dot(final_data_test, final_data_train.transpose())

In [39]:
#converting to ndarrays

y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
y_train

array([ 9.19  , 21.3579, 11.2   , ..., 28.5   , 49.99  , 43.5   ])

In [40]:
pd.DataFrame(mlk_train).to_csv('mlk_train.csv',index = False, header = False)
pd.DataFrame(mlk_test).to_csv('mlk_test.csv',index = False, header = False)

In [41]:
# Normalizing y_train
y_train_nrm = np.asarray(y_train - nrm_price)
y_train_nrm_avg = np.asarray(y_train - nrm_price_avg)
y_train_nrm_med = np.asarray(y_train - nrm_price_med)


In [42]:
#reshaping for compatibility
y_train = y_train.reshape(2700)
y_train_nrm = y_train_nrm.reshape(2700)
y_train_nrm_avg = y_train_nrm_avg.reshape(2700)
y_train_nrm_med = y_train_nrm_med.reshape(2700)

y_test = y_test.reshape(300)

In [43]:
# viewing shape

mlk_train.shape

(2700, 2700)

In [44]:
# viewing shape

mlk_test.shape

(300, 2700)

In [45]:
#viewing shape

y_train_nrm.shape

(2700,)

In [46]:
#viewing shape

y_test.shape

(300,)

In [47]:
y_train_nrm

array([-224.9764, -212.8085, -222.9664, ..., -205.6664, -184.1764,
       -190.6664])

In [48]:
#saving as exel file for review
pd.DataFrame(mlk_train).to_csv('mlk_train.csv', index = False, header = False)
pd.DataFrame(mlk_test).to_csv('mlk_test.csv',index = False, header = False)

# Creating SVR model and predicting

In [49]:
#initiating SVR Objects for each normalized target
svr = SVR(kernel= 'precomputed')
svr_nrm = SVR(kernel= 'precomputed')
svr_nrm_avg = SVR(kernel= 'precomputed')
svr_nrm_med = SVR(kernel= 'precomputed')

#fitting the SVR models
svr.fit(mlk_train, y_train) 
svr_nrm.fit(mlk_train, y_train_nrm) 
svr_nrm_avg.fit(mlk_train, y_train_nrm_avg)
svr_nrm_med.fit(mlk_train, y_train_nrm_med)

#predictions
y_pred = svr.predict(mlk_test)
y_pred_nrm = svr_nrm.predict(mlk_test)
y_pred_nrm_avg = svr_nrm_avg.predict(mlk_test)
y_pred_nrm_med = svr_nrm_med.predict(mlk_test)

# Evaluating Predictions using Mean Absolute Error with and without normalized result

In [50]:
#printing error
error0 = mean_absolute_error(y_test, np.absolute(y_pred))
print('Error w/o normalization:', error0)

error1 = mean_absolute_error(y_test, np.absolute(y_pred_nrm + nrm_price))
print('Error ( (min+max)/2 ):  ', error1)

error2 = mean_absolute_error(y_test, np.absolute(y_pred_nrm_avg + nrm_price_avg))
print('Error (nrm_avg):\t', error2)

error3 = mean_absolute_error(y_test, np.absolute(y_pred_nrm_med + nrm_price_med))
print('Error (nrm_med):\t', error3)

Error w/o normalization: 17.00212260497358
Error ( (min+max)/2 ):   17.0021226049736
Error (nrm_avg):	 17.00212260497358
Error (nrm_med):	 17.002122604973575


In [51]:
y_test_df = pd.DataFrame(y_test)
y_pred_df = pd.DataFrame(y_pred)
y_pred_nrm_df = pd.DataFrame(y_pred_nrm + nrm_price)
y_pred_avg_df = pd.DataFrame(y_pred_nrm_avg + nrm_price_avg)
y_pred_mid_df = pd.DataFrame(y_pred_nrm_med + nrm_price_med)

In [52]:
y_test_df.to_csv('y_test.csv', index = False, header = False)
y_pred_df.to_csv('y_pred.csv', index = False, header = False)
y_pred_nrm_df.to_csv('y_pred_nrm.csv', index = False, header = False)
y_pred_avg_df.to_csv('y_pred_nrm_avg.csv', index = False, header = False)
y_pred_mid_df.to_csv('y_pred_nrm_med.csv', index = False, header = False)