## CNN_Features

Extract CNN features from VGG-16.

In [1]:
import tensorflow as tf

In [17]:
import yaml

from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

import csv

In [3]:
'''
Feature extraction using VGG16
'''
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np
import os



Using TensorFlow backend.


In [7]:
data_path = "../datasets/bikes_im/"
output_path = "../datasets/bikes_cnn/"

if not os.path.exists(output_path):
    os.makedirs(output_path)

model = VGG16(weights='imagenet', include_top=False)

cnn_feats = np.zeros((21843, 512 * 7 * 7))

with open("../datasets/bikes_filtered.csv", "r") as file:
    i = -1
    for data_point in file:
        i += 1
        index, name, msrp = data_point.split(",")
        img_path = data_path + index + '.jpg'
        img = image.load_img(img_path, target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        features = model.predict(x)
        # print(features.shape) # (1,7,7,512) 512 7x7 feature maps

        # Reshape features into 2D tensor
        # https://datascience.stackexchange.com/questions/16444/feature-extraction-for-a-pretrained-model-in-keras
        reshaped_features = features.reshape(1, 512*7*7)
        cnn_feats[i, :] = reshaped_features
        
        if i % 100 == 0:
            print(i)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900
10000
10100
10200
10300
10400
10500
10600
10700
10800
10900
11000
11100
11200
11300
11400
11500
11600
11700
11800
11900
12000
12100
12200
12300
12400
12500
12600
12700
12800
12900
13000
13100
13200
13300
13400
13500
13600
13700
13800
13900
14000
14100
14200
14300
14400
14500
14600
14700
14800
14900
15000
15100
15200
15300
15400
15500
15600
15700
15800
15900
16000
16100
16200
16300
16400
16500
16600
16700
16800
16900
17000
17100
17200
17300
17400
17500
17600
17700
17800
17900
18000
18100
18200
18300
18400
18

In [8]:
cnn_feats.shape

(21843, 25088)

In [9]:
cnn_feats.nbytes

4383977472

In [10]:
from sklearn.decomposition import PCA

pca = PCA(n_components=256)
pca.fit(cnn_feats)
cnn_feats_compressed = pca.transform(cnn_feats)


In [12]:
pca.components_.shape

(256, 25088)

In [13]:
np.save("bikes_cnn_pca_components", pca.components_)
np.save("bikes_cnn_pca_features", cnn_feats_compressed)

In [15]:
im_inds = []
im_names = []
im_prices = []
with open('../datasets/bikes_filtered.csv', 'r') as f:
        datareader = csv.reader(f)
        for line in datareader:
            im_inds.append(line[0])
            im_names.append(line[1])
            im_prices.append(line[2])

In [18]:
# Load training and eval data
train_data = cnn_feats_compressed
im_prices_int = []
for i in im_prices:
    im_prices_int.append(int(i))
train_labels = im_prices_int


regr = linear_model.LinearRegression()

regr.fit(train_data, train_labels)

label_pred = regr.predict(train_data)

for a,b in zip(train_labels, label_pred):
    print(a,b)

print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(label_pred, train_labels))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(train_labels, label_pred))





1300 1731.65169688
70 1414.80310317
70 1414.80310317
80 -98.5952589479
80 488.187384161
81 -570.852167072
90 75.6511212509
90 -943.946441615
100 1321.86364593
100 395.978747152
100 546.091535502
100 141.851912109
100 1741.46262065
100 533.714637649
100 1741.46262065
100 533.714637649
105 506.668068569
109 -267.183972079
110 1225.99495371
110 1225.99495371
110 753.26753676
110 1070.128933
110 -190.699721731
110 -98.5952589479
110 270.246476057
110 -166.746046686
110 643.632620315
110 312.845610221
110 -218.291521528
110 157.151951878
110 -19.7901919227
110 -604.234768344
110 -93.3876972969
110 -218.291521528
110 157.151951878
110 -19.7901919227
110 -1148.44471773
110 -1148.44471773
110 228.869630056
110 228.869630056
110 1490.36361242
115 282.844138919
115 -133.374411502
115 489.841812364
115 -236.822207422
115 18.7772770694
115 1240.48554733
115 1240.48554733
115 59.2437352255
115 -615.040835996
115 -835.906026873
115 636.808729894
116 504.498562078
120 755.486704976
120 551.949334017


250 -39.0960983816
250 1127.5950392
250 891.710141643
250 648.907451186
250 19.9133030935
250 660.430010729
250 -338.091590766
250 -330.457064558
250 525.433132823
250 110.864867526
250 304.663440425
250 547.838957644
250 -112.225624742
250 265.671573511
250 906.116662423
250 679.167552409
250 -260.10560496
250 685.908735147
250 717.303272885
250 352.104751544
250 366.878338518
250 -25.1281749498
250 808.06076131
250 -995.899607271
250 1102.24904795
250 -157.28118822
250 463.973969198
250 663.829399987
250 420.072824719
250 653.410981811
250 190.725016509
250 127.459949621
250 190.725016509
250 723.023049207
250 -35.9078445659
250 -35.9078445659
250 453.982422318
250 9.0727236547
250 200.65975397
250 433.750662807
250 9.0727236547
250 357.776359372
250 900.307428944
250 130.601449838
250 -210.924744225
250 1256.98434244
250 493.85697474
250 -40.7068084886
250 455.509092213
250 1049.69846102
250 74.6604400031
250 386.15096914
250 461.40232939
250 -109.324979495
250 628.338421217
250 316

330 1159.22629947
330 491.016363463
330 359.178457151
330 1327.32823471
330 -43.6781884616
330 585.112461548
330 663.337547929
330 572.984053884
330 -43.6781884616
330 169.770410477
330 518.551745737
330 601.147545139
330 -484.983122032
330 -490.449200118
330 -54.9546753186
330 563.48284637
330 518.551745737
330 601.147545139
330 -484.983122032
330 -490.449200118
330 -54.9546753186
330 957.626008569
330 602.486681936
330 -189.381245362
330 1107.92246701
330 291.046024302
330 949.26132232
330 387.650858027
330 986.280305344
330 1258.69875932
330 533.034560671
330 612.032532824
330 118.479415364
330 753.265437805
330 211.523921067
330 1071.84883067
330 269.506242043
330 1078.18401556
330 -894.080343738
330 270.395531501
330 565.348345281
330 239.579996241
330 986.280305344
330 1258.69875932
330 859.223432366
330 166.702829068
330 1700.81517785
330 373.63592554
330 820.883874563
330 731.774041739
330 228.575572982
330 -46.9630834509
330 146.199897698
330 144.48560568
330 638.250255331
330

400 398.926446458
400 157.478274915
400 430.196561311
400 600.060780611
400 520.56271694
400 430.196561311
400 600.060780611
400 493.452519253
400 676.719009067
400 791.506274905
400 185.105254798
400 1827.65117926
400 407.536302601
400 19.7916853385
400 19.7916853385
400 547.589279054
400 172.321626074
400 754.446430218
400 2166.37663089
400 524.744870141
400 756.436803704
402 -140.273732414
402 725.909830229
403 215.722410129
405 -21.5354819812
405 165.625847125
405 165.625847125
405 165.625847125
405 378.000604963
405 661.927134278
405 311.368585932
405 772.845055796
405 -177.340276538
405 536.038592209
409 1593.47525492
409 333.541725353
409 899.152251589
409 899.152251589
409 899.152251589
409 899.152251589
409 957.478501247
409 312.712883984
410 1117.14341799
410 87.431918161
410 561.941148444
410 115.250429636
410 115.250429636
410 947.568881689
410 -16.441790687
410 750.858645736
410 139.526114554
410 -331.404131914
410 611.432407601
410 -61.4141321888
410 108.635909827
410 758

490 1043.45354228
490 1556.69307592
490 152.520051668
490 561.287585963
490 -183.854969357
490 1423.55728398
490 25.0950856429
490 927.149470766
490 656.015636769
490 -484.958702216
490 636.772193342
490 671.545778337
490 877.145164726
490 250.191173355
490 -432.307024722
490 783.6736912
490 1052.49751392
490 1210.38700476
490 957.744183419
490 -218.464630435
490 1046.87486201
490 547.192184814
490 417.215936872
490 417.215936872
490 451.487964888
490 451.487964888
490 451.487964888
490 957.744183419
490 -218.464630435
490 893.982922397
490 506.572955636
490 506.572955636
490 -63.9312101056
490 -83.4814880004
490 -83.4814880004
490 230.086259228
490 824.956089956
490 79.0975103969
490 810.302813467
490 1299.92624619
490 1299.92624619
490 1299.92624619
490 22.3799593398
490 795.003211191
490 778.436940411
490 572.255898831
490 343.475133883
490 440.64797933
490 486.703190531
490 1150.22125983
490 1122.60736334
490 1122.60736334
490 -254.569864953
490 167.817802761
490 561.287585963
490 

570 732.770869852
570 1976.73758974
570 291.97039438
570 316.433675349
570 776.681638299
570 735.196577015
570 735.196577015
570 902.86634799
570 902.86634799
570 579.107489715
570 1156.90570564
570 887.725272228
570 340.816243671
570 1351.9671946
570 739.968417018
570 340.816243671
570 714.287155981
570 1595.01573569
570 904.29714383
570 719.083290321
570 506.761746168
570 777.773406967
570 1437.70310505
570 1037.50751456
570 151.880967383
570 492.864309683
570 121.242382247
570 1511.05781538
570 588.180518986
570 406.730766152
570 442.344353907
570 984.346588856
570 214.80550347
570 388.1999225
570 814.13523959
570 814.13523959
570 890.678463919
570 945.552895783
570 945.552895783
570 1477.91933129
570 1513.43086385
570 1477.91933129
570 1024.92601956
570 84.0049265585
570 79.8071210258
570 377.86263376
571 -390.900797938
571 355.931693082
575 724.454827189
575 281.792627861
575 450.133574117
575 -323.105512503
575 1314.86503896
575 783.430113423
575 795.674638698
575 2592.74024843
5

625 1543.02632567
625 1543.02632567
625 171.041222911
625 683.671116174
625 612.302606073
625 439.650953782
625 462.826578327
625 239.358543534
625 890.859307417
625 159.920049269
625 554.999723362
625 330.358853582
625 707.915572299
629 363.502603182
629 871.128030521
629 1609.96629875
629 1354.90235402
629 -81.229551425
629 502.564694241
629 909.713537708
629 712.350596845
629 1202.57825259
629 1219.72912465
629 400.803016918
630 666.404279329
630 808.178531493
630 1146.43911689
630 1146.43911689
630 1531.02955767
630 738.570259259
630 1720.52796517
630 -66.5183627146
630 -66.5183627146
630 1720.52796517
630 -66.5183627146
630 -66.5183627146
630 94.4064538699
630 693.381052437
630 -53.9731825401
630 -62.9576805916
630 335.868002613
630 1030.06740382
630 684.540418508
630 731.080315541
630 946.086978086
630 946.086978086
630 1229.26622901
630 1395.31809344
630 1914.24877042
630 1943.37548639
630 1943.37548639
630 1992.95744988
630 1292.76554025
630 1992.95744988
630 1292.76554025
630 

739 393.330270019
739 1488.29737078
740 137.031655663
740 1768.85191573
740 467.222822054
740 573.62842871
740 997.726699999
740 459.028299937
740 679.14982325
740 1816.96107419
740 860.531638586
740 679.14982325
740 1816.96107419
740 129.145009712
740 430.256788057
740 -192.064461133
740 -192.064461133
740 466.153093952
740 522.749969919
740 1127.88138065
740 1708.3737905
740 465.472492247
740 28.1795187885
740 914.16898338
740 1178.0553806
740 58.0346982082
740 1423.50475211
740 73.0807903005
740 448.304048139
740 1056.42292719
740 714.287094634
740 779.959348326
740 1018.62156082
740 1374.04527924
740 882.742198948
740 882.742198948
740 1187.15436846
740 2121.75878684
740 1187.15436846
740 1187.15436846
742 165.359027282
742 208.286645838
745 679.535234883
745 1118.83637231
745 679.535234883
749 3056.83827478
749 1516.27139547
749 1540.24054252
749 1264.57492542
749 728.049419816
749 1275.86916094
749 969.662521273
749 3083.63351712
749 1044.17944647
749 395.415746487
749 660.258543

900 1318.43010612
900 853.226037783
900 989.416661686
900 83.4039543142
900 83.4039543142
900 1900.65895951
900 1943.84079173
900 2087.61640746
900 1538.47430143
900 2076.60813219
900 1342.1736278
900 1714.66634029
900 1693.54098523
900 1693.54098523
900 2291.74721994
900 1707.36870595
900 1697.75887501
900 200.752495309
900 611.701737767
900 470.117353579
900 521.72731751
900 752.596279726
900 752.596279726
900 559.327476661
900 796.254040738
900 762.540692105
900 514.464686161
900 523.484122418
900 900.687405097
900 2137.23353178
900 626.680238465
900 1373.12614027
900 587.910835653
900 2219.87476242
900 1337.79764848
900 1877.06071072
900 1924.68926925
900 1432.54746228
900 1511.74954662
900 1223.38995373
900 1761.47597982
900 1370.53704545
900 2237.03064102
900 1479.27397719
900 1368.43987207
900 1349.02807971
900 1349.02807971
900 -198.290620334
900 508.57021492
900 508.57021492
900 1327.28833963
900 1634.56062037
900 3022.73627509
900 376.137695574
900 882.271879419
900 520.60395

1100 835.252991362
1100 1885.23958155
1100 1577.23144733
1100 868.429086703
1100 1381.96983873
1100 1982.13427975
1100 2191.66193646
1100 1824.07100493
1100 82.7466936433
1100 1648.69999546
1100 1136.97187502
1100 1629.58994244
1100 500.031767026
1100 1974.03144623
1100 1982.83516781
1100 1954.45116811
1100 2597.28078844
1100 1260.29880794
1100 1113.60900547
1100 1352.81912783
1100 2297.68184944
1100 1025.52565726
1100 2060.86565492
1100 1379.74689259
1100 1629.58994244
1100 1255.68472258
1100 634.479759524
1100 1255.68472258
1100 815.106718353
1100 -46.1716401393
1100 2121.71562797
1100 1674.79723452
1100 1128.60750799
1100 1128.60750799
1100 1383.69820239
1100 2755.24271206
1100 1887.04749725
1100 1535.20120886
1100 3049.6899197
1100 810.789348867
1100 3049.6899197
1100 829.295121194
1100 1474.13536379
1100 569.127522736
1100 738.976347316
1100 410.728813266
1102 2038.4739824
1112 -258.117999358
1115 693.687596691
1115 1343.24684045
1115 2221.20737408
1116 1595.11394448
1120 388.6017

1400 704.553005796
1400 2920.57702526
1400 2920.57702526
1400 2547.4057454
1400 2336.86283363
1400 1442.80684087
1400 1658.73231446
1400 1137.14473951
1400 1342.12125227
1400 491.41581587
1400 798.141583327
1400 2418.10973118
1400 2478.1435562
1400 1251.78691534
1400 1635.26372488
1400 1428.74324343
1400 992.7102939
1400 1748.15103948
1400 702.374875661
1400 1088.2626148
1400 1806.99807334
1400 1753.48572844
1400 1806.99807334
1400 1871.06223628
1400 1572.28161992
1400 2409.33724233
1400 2409.33724233
1400 1070.15867532
1400 1357.61696275
1400 2070.37516405
1400 2070.37516405
1400 1471.48041703
1400 2005.8881011
1400 678.689579404
1400 2482.67301769
1400 2055.8559039
1400 1306.61843363
1400 678.689579404
1400 2482.67301769
1400 2668.84475991
1400 2695.64462095
1400 759.976523397
1400 2018.40244637
1400 429.291126248
1400 1778.9740578
1400 3831.4425231
1400 759.976523397
1400 2018.40244637
1400 429.291126248
1400 1819.67641962
1400 2459.77011808
1400 2281.13119795
1400 1363.33803461
140

1800 2593.86186677
1800 3504.79042211
1800 3140.25141823
1800 2498.71231999
1800 2382.8548439
1800 2117.57368886
1800 2057.42050761
1800 2316.28520806
1800 3789.9898902
1800 3263.27274513
1800 1576.07250565
1800 1973.29392041
1800 1885.62700222
1800 3142.57151305
1800 2348.64639459
1800 1462.88194478
1800 1655.87293724
1800 2834.21952307
1800 3811.73460828
1800 1999.2244796
1800 3179.81842579
1800 2952.57977465
1800 3934.93140354
1800 2297.24245719
1800 3195.212662
1800 1245.20745046
1800 1884.66181338
1800 2499.73926787
1800 1302.27985694
1800 1561.05012397
1800 2143.77059898
1800 1909.59026995
1800 1698.66317944
1800 1302.27985694
1800 1561.05012397
1800 2123.39656584
1800 2138.30763216
1800 1608.57637024
1800 2807.81711082
1800 897.934814571
1800 1017.8185864
1800 1789.89542776
1800 2321.7479176
1800 811.767264633
1800 2678.13555771
1800 2628.7747994
1800 1359.64516327
1800 962.091529081
1800 2606.43439253
1800 2740.62920075
1800 962.091529081
1800 3063.69720098
1800 3653.52369709
1

2300 3802.0574741
2300 2317.16101679
2300 2791.8780682
2300 2921.24907023
2300 3323.53120622
2300 1381.88492907
2300 1918.86960332
2300 1001.93682415
2300 1901.46116096
2300 3157.80213318
2300 701.925828814
2300 2692.66182665
2300 2692.66182665
2300 3210.62285487
2300 3210.62285487
2300 1660.66624021
2300 2902.05208102
2300 2667.9721388
2300 3260.64304668
2300 3544.12981794
2300 2941.42970188
2300 1704.29402026
2300 1704.29402026
2300 3145.64718765
2300 1822.77884725
2300 1822.77884725
2300 2568.62826025
2300 3299.70888118
2300 2311.66398734
2300 2312.20844084
2300 3030.13303991
2300 2394.49615281
2300 3297.53195886
2300 3502.86663375
2300 4237.25875076
2300 1919.11984625
2300 1345.23781951
2300 4370.54374474
2300 2715.39510207
2300 2587.88650365
2300 2762.96184821
2300 3483.76595854
2300 3004.88704149
2300 1867.08064054
2300 2470.37726429
2300 1972.15702232
2300 2744.63603125
2300 873.204259621
2300 1802.63645582
2300 1802.63645582
2300 3513.52297145
2300 2511.19374073
2300 1979.15654

2900 3204.60575897
2900 3988.74930228
2900 3456.80544858
2900 3627.07574001
2900 3477.02203645
2900 2747.89317061
2900 4462.25789442
2900 3326.13418731
2900 4399.08809266
2900 3182.40257681
2900 2126.13076347
2900 2532.39468953
2900 3919.08703054
2900 2674.41262012
2900 2758.38711706
2900 1388.30713846
2900 2909.02386382
2900 3018.92450425
2900 3668.23211678
2900 1682.28726621
2900 1800.58182613
2900 4342.73520249
2900 3042.90953289
2900 3328.62788368
2900 3454.2482704
2900 3118.5485345
2900 2764.26670177
2900 3973.56671732
2900 3973.56671732
2900 2448.61471676
2900 2634.11832658
2900 3423.61501378
2900 3093.84693659
2900 4296.5254073
2900 2816.50024965
2900 3490.10337847
2900 2114.8491462
2900 3369.61779781
2900 3128.48902862
2900 2133.18247161
2900 2280.47838745
2900 2898.23444804
2900 2246.02874739
2900 2902.76994914
2900 2729.23430522
2900 3446.92018104
2900 3583.54664346
2900 3400.88597186
2900 2934.86422346
2900 4853.29359457
2900 1404.64801371
2918 3230.59330608
2918 3230.593306

3629 3755.6547306
3630 3354.56073516
3630 3014.72579707
3630 3383.0621744
3630 2667.78362018
3630 3014.72579707
3630 3960.32490227
3630 2179.54898178
3630 1686.14312973
3640 3682.45298337
3640 4062.41527297
3640 3557.80503825
3647 3842.36754141
3649 3439.01373961
3650 2095.80698328
3650 3785.5287683
3650 4177.70834949
3650 2509.64478754
3650 3819.62297678
3650 3819.62297678
3650 2748.07544163
3650 4149.45675981
3650 5465.42289537
3655 4899.07570946
3669 2566.03158128
3670 2719.35194046
3670 3215.90188091
3670 3696.24777878
3670 4811.53201198
3675 3818.16548682
3675 3818.16548682
3675 3475.70524016
3675 3047.25482611
3675 3047.25482611
3675 3350.33145609
3675 3649.98847045
3675 3916.34535412
3680 2850.819966
3680 3425.35442467
3685 4329.96561841
3690 5073.7356344
3690 5073.7356344
3695 3497.44479527
3695 3495.86729199
3695 3901.58383431
3695 3218.47110448
3699 3813.12177812
3699 3813.12177812
3699 4233.84469047
3699 5423.03783003
3699 2738.05645096
3699 3277.56998648
3699 3265.14666415


5115 7152.57584037
5149 4092.20860681
5149 3105.80738243
5149 4440.49205745
5149 4440.49205745
5150 5065.51693242
5150 5985.3188746
5150 5985.3188746
5150 4424.38812578
5150 4798.66662183
5165 2458.67084329
5169 4875.81192566
5169 2623.10261822
5175 2093.14344155
5177 4842.36213178
5180 2231.21742507
5190 6065.96590425
5199 3027.63726466
5199 5536.02114462
5200 3249.50917076
5200 5413.18232905
5200 4864.1493889
5200 4864.1493889
5200 3956.25875587
5200 3163.80790071
5200 2577.69624116
5200 3534.54756892
5200 3534.54756892
5200 3603.7927769
5200 3972.33214921
5200 3375.83516194
5200 3375.83516194
5200 3224.12970384
5200 3215.47614827
5200 6548.95304358
5200 4915.05632081
5200 3688.95477526
5200 4478.75224983
5200 3461.29298636
5200 4768.29169125
5200 4807.84761941
5220 4681.87609466
5230 3724.81330334
5245 4133.5864254
5249 5141.63015872
5250 3238.80910263
5250 4061.13967928
5250 3987.3155943
5250 3494.76761504
5250 4364.85140913
5250 4972.61407891
5250 3575.59363893
5250 3688.17733142


Running on the entire dataset, we get
Mean squared error: 1122962.53
Variance score: 0.65