In [2]:
import argparse
import pandas as pd
import os
from tqdm import tqdm as tqdm
import urllib.request
import numpy as np
import sys
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.models import Model
from numpy import save

from bert_serving.client import BertClient

# Use the below command to start the bert as a service
##### bert-serving-start  -model_dir="D:\UB\2nd_sem\CSE_676_DL\Project\DL_work\Bert_model\uncased_L-12_H-768_A-12"

### we will need to cahnge the path of the model to the local path of the model in your system.

In [3]:
bc = BertClient()

In [4]:
df = pd.read_csv("cleaned_simple.csv")
df.shape

(10541, 9)

In [5]:
vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = vgg_model.output
x = Flatten()(x) 
x = Dense(768, activation='relu')(x)
model = Model(inputs=vgg_model.input, outputs=x)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [6]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [7]:
def get_image_features(img_path):
#     print("img_path-",img_path)
    image = tf.io.read_file(img_path)
    image = tf.image.decode_jpeg(image, channels=3)
    # Scaling to [0,1] in tf.float32 data type
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)

    # # Expanding the dimensions of tensor
    # image = tf.expand_dims(image, 0)

    # Resizing the tensor
    image = tf.image.resize(image, [224, 224])
    # # Expanding the dimensions of tensor
    image = tf.expand_dims(image, 0)

    features = model.predict(image,steps=1)
    return features

In [8]:
df = df.dropna(subset=['clean_title'])
df.shape

(9301, 9)

In [9]:
df.head()

Unnamed: 0.1,Unnamed: 0,clean_title,image_url,title,2_way_label,3_way_label,6_way_label,id,num_comments
0,1,i was able to borrow super smash bros for two ...,https://preview.redd.it/m0smpvvap8z21.jpg?widt...,I was able to borrow Super smash bros for two ...,1.0,0.0,0.0,bqmnjz,6.0
1,2,corners lowlife,https://preview.redd.it/xfaq4xrhwbc31.png?widt...,corners - lowlife (1997),0.0,2.0,1.0,chf6dr,0.0
2,3,you think youre so awesome in your nonexploded...,http://i.imgur.com/CnfS9wT.jpg,You think youre so awesome in your non-explode...,0.0,2.0,4.0,cfz6agn,
4,6,jeremy clarkson immortalised with hungry hippo...,https://external-preview.redd.it/OKJe3CSlqcVxj...,Jeremy Clarkson immortalised with Hungry Hippo...,1.0,0.0,0.0,2zv8ld,1.0
5,7,the new fiji water spokesman,http://i.imgur.com/iiUsomD.jpg,The new Fiji water spokesman,0.0,2.0,4.0,co3t7t7,


In [None]:
base_img_path = r"D:\UB\2nd_sem\CSE_676_DL\Project\DL_work\Code\Images"


img_path_list = []
label_2 = []
label_3 = []
label_6 = []

img_feature_np = np.zeros((1, 768))
bert_embedding_np = np.zeros((1, 768))


for index, row in df.iterrows():
#     if index > 30:
#         break
    print("Index-", index)
#     print('row["clean_title"]:',row["clean_title"])
    if row["clean_title"] != "" and row["clean_title"] != " nan":
        try:
            img_path = os.path.join(base_img_path, row["id"] + ".jpg")
#             img_path = base_img_path + row["id"] + ".jpg"

            img_features = get_image_features(str(img_path))

            bert_title_embeddings = bc.encode([row["clean_title"]])

        #     print(len(img_features[0]))
        #     print(len(bert_title_embeddings[0]))
            label_2.append(row["2_way_label"])
            label_3.append(row["3_way_label"])
            label_6.append(row["6_way_label"])

            img_path_list.append(img_path)
            img_feature_np = np.append(img_feature_np, img_features,axis = 0)
            bert_embedding_np = np.append(bert_embedding_np, bert_title_embeddings,axis = 0)
            if index%30==0:
                feature_df = pd.DataFrame(list(zip(img_path_list, label_2,label_3, label_6)),columns=['img_path', '2_way_label', '3_way_label',"6_way_label"])
                feature_df.to_csv("data_without_features.csv")
                new_img_features = img_feature_np[1:]
                new_bert_embeddings = bert_embedding_np[1:]
                save('new_img_features.npy', new_img_features)
                save('new_bert_embeddings.npy', new_bert_embeddings)
        except:
            print("Got error at Index-",index)
            pass


Index- 0
Index- 1
Index- 2
Index- 4
Index- 5
Index- 6
Index- 7
Index- 8
Index- 9
Index- 10
Index- 11
Index- 12
Index- 13
Index- 14
Index- 15
Index- 16
Index- 17
Index- 19
Index- 20
Index- 21
Index- 22
Index- 23
Index- 24
Index- 25
Index- 26
Index- 27
Index- 28
Index- 31
Index- 32
Index- 33
Index- 34
Index- 35
Index- 36
Index- 37
Index- 38
Index- 39
Index- 40
Index- 41
Index- 42
Index- 43
Index- 44
Index- 45
Index- 46
Index- 47
Index- 48
Index- 49
Index- 50
Index- 51
Index- 52
Index- 53
Index- 54
Index- 55
Index- 56
Index- 57
Index- 59
Index- 60
Index- 61


here is what you can do:
- or, start a new server with a larger "max_seq_len"
  '- or, start a new server with a larger "max_seq_len"' % self.length_limit)


Index- 62
Index- 63
Index- 64
Index- 65
Index- 66
Index- 67
Index- 68
Index- 70
Index- 71
Index- 72
Index- 73
Index- 74
Index- 75
Index- 76
Index- 77
Index- 78
Index- 79
Index- 81
Index- 82
Index- 83
Index- 84
Index- 85
Index- 86
Index- 88
Index- 89
Index- 90
Index- 91
Index- 93
Index- 94
Index- 95
Index- 96
Index- 97
Index- 98
Index- 101
Index- 102
Index- 103
Index- 104
Index- 105
Index- 107
Index- 108
Index- 109
Index- 110
Index- 111
Index- 112
Index- 113
Index- 114
Index- 115
Index- 116
Index- 117
Index- 118
Index- 120
Index- 121
Index- 122
Index- 124
Index- 125
Index- 126
Index- 127
Index- 128
Index- 129
Index- 130
Index- 131
Index- 132
Index- 133
Index- 134
Index- 136
Index- 137
Index- 139
Index- 140
Index- 141
Index- 142
Index- 143
Index- 144
Index- 145
Index- 146
Index- 147
Index- 148
Index- 149
Index- 150
Index- 151
Index- 152
Index- 153
Index- 154
Index- 155
Index- 156
Index- 157
Index- 158
Index- 159
Index- 160
Index- 161
Index- 162
Index- 163
Index- 164
Index- 165
Index- 166

Index- 913
Index- 914
Index- 915
Index- 916
Index- 917
Index- 918
Index- 919
Index- 921
Index- 922
Index- 923
Index- 924
Index- 925
Index- 926
Index- 927
Index- 928
Index- 929
Index- 930
Index- 931
Index- 932
Index- 933
Index- 934
Index- 935
Index- 936
Index- 937
Index- 938
Index- 939
Index- 940
Index- 941
Index- 942
Index- 943
Index- 944
Index- 945
Index- 946
Index- 947
Index- 948
Index- 949
Index- 950
Index- 951
Index- 952
Index- 953
Index- 954
Index- 955
Index- 956
Index- 958
Index- 959
Index- 960
Index- 962
Index- 963
Index- 964
Index- 965
Index- 966
Index- 968
Index- 970
Index- 971
Index- 972
Index- 973
Index- 974
Index- 975
Index- 976
Index- 977
Index- 978
Index- 980
Index- 981
Index- 982
Index- 985
Index- 986
Index- 987
Index- 988
Index- 989
Index- 990
Index- 991
Index- 992
Index- 993
Index- 994
Index- 995
Index- 996
Index- 997
Index- 998
Index- 999
Index- 1000
Index- 1001
Index- 1002
Index- 1003
Index- 1004
Index- 1005
Index- 1007
Index- 1008
Index- 1009
Index- 1010
Index- 1011

Index- 1676
Index- 1677
Index- 1678
Index- 1679
Index- 1680
Index- 1681
Index- 1682
Index- 1683
Index- 1684
Index- 1685
Index- 1687
Index- 1688
Index- 1689
Index- 1690
Index- 1691
Index- 1692
Index- 1693
Index- 1694
Index- 1695
Index- 1697
Index- 1698
Index- 1700
Index- 1701
Index- 1702
Index- 1703
Index- 1706
Index- 1707
Index- 1708
Index- 1709
Index- 1710
Index- 1712
Index- 1713
Index- 1715
Index- 1717
Index- 1718
Index- 1719
Index- 1720
Index- 1721
Index- 1722
Index- 1723
Index- 1726
Index- 1728
Index- 1729
Index- 1730
Index- 1731
Index- 1732
Index- 1733
Index- 1734
Index- 1735
Index- 1736
Index- 1737
Index- 1738
Index- 1739
Index- 1740
Index- 1743
Index- 1744
Index- 1745
Index- 1746
Index- 1747
Index- 1748
Index- 1750
Index- 1751
Index- 1752
Index- 1754
Index- 1755
Index- 1756
Index- 1757
Index- 1758
Index- 1760
Index- 1763
Index- 1764
Index- 1765
Index- 1766
Index- 1767
Index- 1768
Index- 1769
Index- 1770
Index- 1771
Index- 1772
Index- 1773
Index- 1775
Index- 1776
Index- 1777
Inde

Index- 2455
Index- 2456
Index- 2457
Index- 2458
Index- 2459
Index- 2460
Index- 2461
Index- 2462
Index- 2463
Index- 2464
Index- 2465
Index- 2466
Index- 2467
Index- 2468
Index- 2470
Index- 2471
Index- 2473
Index- 2475
Index- 2476
Index- 2477
Index- 2479
Index- 2481
Index- 2482
Index- 2483
Index- 2485
Index- 2486
Index- 2487
Index- 2488
Index- 2489
Index- 2490
Index- 2491
Index- 2493
Index- 2494
Index- 2495
Index- 2496
Index- 2497
Index- 2498
Index- 2499
Index- 2500
Index- 2501
Index- 2502
Index- 2503
Index- 2504
Index- 2505
Index- 2506
Index- 2507
Index- 2509
Index- 2510
Index- 2511
Index- 2514
Index- 2515
Index- 2516
Index- 2517
Index- 2518
Index- 2519
Index- 2520
Index- 2521
Index- 2522
Index- 2523
Index- 2524
Index- 2525
Index- 2527
Index- 2528
Index- 2529
Index- 2530
Index- 2531
Index- 2532
Index- 2533
Index- 2534
Index- 2536
Index- 2537
Index- 2538
Index- 2540
Index- 2541
Index- 2542
Index- 2543
Index- 2544
Index- 2546
Index- 2548
Index- 2549
Index- 2550
Index- 2551
Index- 2552
Inde

Index- 3213
Index- 3214
Index- 3215
Index- 3216
Index- 3217
Index- 3218
Index- 3219
Index- 3220
Index- 3221
Index- 3222
Index- 3223
Index- 3224
Index- 3225
Index- 3226
Index- 3227
Index- 3228
Index- 3229
Index- 3230
Index- 3231
Index- 3232
Index- 3233
Index- 3234
Index- 3235
Index- 3236
Index- 3237
Index- 3238
Index- 3239
Index- 3240
Index- 3241
Index- 3242
Index- 3243
Index- 3244
Index- 3245
Index- 3247
Index- 3249
Index- 3251
Index- 3252
Index- 3253
Index- 3255
Index- 3256
Index- 3257
Index- 3258
Index- 3259
Index- 3261
Index- 3263
Index- 3264
Index- 3265
Index- 3266
Index- 3267
Index- 3268
Index- 3269
Index- 3270
Index- 3271
Index- 3272
Index- 3273
Index- 3276
Index- 3278
Index- 3279
Index- 3280
Index- 3281
Index- 3282
Index- 3283
Index- 3285
Index- 3286
Index- 3287
Index- 3288
Index- 3289
Index- 3291
Index- 3292
Index- 3293
Index- 3294
Index- 3295
Index- 3296
Index- 3297
Index- 3298
Index- 3299
Index- 3300
Index- 3301
Index- 3302
Index- 3305
Index- 3306
Index- 3307
Index- 3308
Inde

Index- 3995
Index- 3996
Index- 3997
Index- 3998
Index- 4000
Index- 4001
Index- 4002
Index- 4003
Index- 4004
Index- 4005
Index- 4007
Index- 4008
Index- 4010
Index- 4011
Index- 4012
Index- 4013
Index- 4014
Index- 4015
Index- 4016
Index- 4017
Index- 4018
Index- 4019
Index- 4022
Index- 4023
Index- 4024
Index- 4025
Index- 4026
Index- 4027
Index- 4029
Index- 4031
Index- 4032
Index- 4033
Index- 4034
Index- 4035
Index- 4036
Index- 4037
Index- 4038
Index- 4039
Index- 4040
Index- 4041
Index- 4042
Index- 4043
Index- 4044
Index- 4045
Index- 4046
Index- 4047
Index- 4048
Index- 4049
Index- 4050
Index- 4051
Index- 4052
Index- 4053
Index- 4054
Index- 4055
Index- 4056
Index- 4057
Index- 4058
Index- 4059
Index- 4061
Index- 4062
Index- 4063
Index- 4064
Index- 4066
Index- 4067
Index- 4068
Index- 4069
Index- 4070
Index- 4071
Index- 4072
Index- 4074
Index- 4075
Index- 4076
Index- 4078
Index- 4079
Index- 4080
Index- 4081
Index- 4082
Index- 4083
Index- 4085
Index- 4086
Index- 4087
Index- 4088
Index- 4089
Inde

Index- 4761
Index- 4762
Index- 4763
Index- 4765
Index- 4766
Index- 4767
Index- 4768
Index- 4769
Index- 4771
Index- 4772
Index- 4773
Index- 4775
Index- 4776
Index- 4777
Index- 4778
Index- 4779
Index- 4780
Index- 4781
Index- 4782
Index- 4783
Index- 4784
Index- 4785
Index- 4786
Index- 4787
Index- 4788
Index- 4789
Index- 4790
Index- 4791
Index- 4792
Index- 4794
Index- 4795
Index- 4796
Index- 4797
Index- 4798
Index- 4799
Index- 4800
Index- 4801
Index- 4802
Index- 4803
Index- 4804
Index- 4805
Index- 4806
Index- 4807
Index- 4808
Index- 4809
Index- 4810
Index- 4811
Index- 4815
Index- 4816
Index- 4817
Index- 4818
Index- 4820
Index- 4821
Index- 4822
Index- 4823
Index- 4824
Index- 4825
Index- 4826
Index- 4827
Index- 4828
Index- 4831
Index- 4832
Index- 4833
Index- 4834
Index- 4835
Index- 4836
Index- 4837
Index- 4838
Index- 4840
Index- 4841
Index- 4842
Index- 4844
Index- 4845
Index- 4846
Index- 4847
Index- 4848
Index- 4849
Index- 4850
Index- 4851
Index- 4852
Index- 4853
Index- 4854
Index- 4855
Inde

Index- 5526
Index- 5527
Index- 5528
Index- 5529
Index- 5530
Index- 5531
Index- 5532
Index- 5533
Index- 5535
Index- 5536
Index- 5538
Index- 5540
Index- 5541
Index- 5543
Index- 5544
Index- 5545
Index- 5546
Index- 5547
Index- 5548
Index- 5549
Index- 5550
Index- 5551
Index- 5552
Index- 5553
Index- 5556
Index- 5557
Index- 5558
Index- 5559
Index- 5560
Index- 5561
Index- 5562
Index- 5563
Index- 5564
Index- 5565
Index- 5566
Index- 5567
Index- 5568
Index- 5569
Index- 5570
Index- 5571
Index- 5573
Index- 5574
Index- 5575
Index- 5576
Index- 5577
Index- 5579
Index- 5580
Index- 5581
Index- 5582
Index- 5583
Index- 5584
Index- 5585
Index- 5586
Index- 5587
Index- 5589
Index- 5590
Index- 5591
Index- 5592
Index- 5593
Index- 5594
Index- 5595
Index- 5596
Index- 5597
Index- 5598
Index- 5601
Index- 5602
Index- 5603
Index- 5605
Index- 5606
Index- 5607
Index- 5608
Index- 5609
Index- 5610
Index- 5611
Index- 5612
Index- 5613
Index- 5614
Index- 5615
Index- 5616
Index- 5617
Index- 5618
Index- 5619
Index- 5620
Inde

Index- 6298
Index- 6299
Index- 6300
Index- 6301
Index- 6302
Index- 6303
Index- 6304
Index- 6305
Index- 6306
Index- 6307
Index- 6308
Index- 6309
Index- 6310
Index- 6313
Index- 6314
Index- 6315
Index- 6316
Index- 6317
Index- 6318
Index- 6319
Index- 6320
Index- 6322
Index- 6323
Index- 6324
Index- 6326
Index- 6327
Index- 6328
Index- 6329
Index- 6330
Index- 6331
Index- 6332
Index- 6333
Index- 6334
Index- 6335
Index- 6336
Index- 6337
Index- 6338
Index- 6339
Index- 6340
Index- 6341
Index- 6343
Index- 6344
Index- 6345
Index- 6346
Index- 6347
Index- 6348
Index- 6349
Index- 6350
Index- 6351
Index- 6352
Index- 6353
Index- 6354
Index- 6355
Index- 6356
Index- 6357
Index- 6358
Index- 6359
Index- 6360
Index- 6361
Index- 6362
Index- 6363
Index- 6364
Got error at Index- 6364
Index- 6365
Index- 6366
Index- 6367
Index- 6368
Index- 6369
Index- 6370
Index- 6371
Index- 6372
Index- 6373
Index- 6374
Index- 6375
Index- 6376
Index- 6377
Index- 6379
Index- 6380
Index- 6381
Index- 6382
Index- 6385
Index- 6386
Ind

Index- 7064
Index- 7065
Index- 7066
Index- 7067
Index- 7068
Index- 7069
Index- 7070
Index- 7071
Index- 7072
Index- 7073
Index- 7074
Index- 7075
Index- 7076
Index- 7077
Index- 7078
Index- 7079
Index- 7080
Index- 7081
Index- 7082
Index- 7083
Index- 7084
Index- 7085
Index- 7086
Index- 7087
Index- 7088
Index- 7089
Index- 7090
Index- 7091
Index- 7092
Index- 7093
Index- 7094
Index- 7095
Index- 7096
Index- 7097
Index- 7098
Index- 7099
Index- 7100
Index- 7101
Index- 7102
Index- 7103
Index- 7104
Index- 7105
Index- 7107
Index- 7108
Index- 7109
Index- 7110
Index- 7111
Index- 7112
Index- 7113
Index- 7114
Index- 7115
Index- 7116
Index- 7117
Index- 7118
Index- 7119
Index- 7122
Index- 7123
Index- 7124
Index- 7125
Index- 7127
Index- 7128
Index- 7129
Index- 7130
Index- 7132
Index- 7133
Index- 7135
Index- 7136
Index- 7137
Index- 7138
Index- 7139
Index- 7141
Index- 7142
Index- 7143
Index- 7144
Index- 7145
Index- 7146
Index- 7147
Index- 7148
Index- 7149
Index- 7150
Index- 7151
Index- 7152
Index- 7153
Inde

Index- 7836
Index- 7837
Index- 7838
Index- 7839
Index- 7840
Index- 7841
Index- 7843
Index- 7844
Index- 7845
Index- 7846
Index- 7847
Index- 7848
Index- 7849
Index- 7850
Index- 7851
Index- 7852
Index- 7854
Index- 7855
Index- 7857
Index- 7858
Index- 7859
Index- 7860
Index- 7861
Index- 7862
Index- 7863
Index- 7864
Index- 7865
Index- 7867
Index- 7869
Index- 7870
Index- 7871
Index- 7872
Index- 7873
Index- 7874
Index- 7875
Index- 7876
Index- 7877
Index- 7878
Index- 7880
Index- 7882
Index- 7884
Index- 7885
Index- 7886
Index- 7887
Index- 7888
Index- 7889
Index- 7890
Index- 7891
Index- 7892
Index- 7893
Index- 7894
Index- 7896
Index- 7897
Index- 7898
Index- 7899
Index- 7900
Index- 7901
Index- 7902
Index- 7903
Index- 7904
Index- 7905
Index- 7907
Index- 7908
Index- 7910
Index- 7911
Index- 7912
Index- 7913
Index- 7915
Index- 7916
Index- 7917
Index- 7918
Index- 7919
Index- 7921
Index- 7922
Index- 7924
Index- 7925
Index- 7926
Index- 7927
Index- 7928
Index- 7929
Index- 7930
Index- 7933
Index- 7934
Inde

Index- 8624
Index- 8625
Index- 8626
Index- 8627
Index- 8628
Index- 8629
Index- 8630
Index- 8631
Index- 8632
Index- 8634
Index- 8635
Index- 8636
Index- 8638
Index- 8639
Index- 8640
Index- 8641
Index- 8642
Index- 8645
Index- 8646
Index- 8647
Index- 8648
Index- 8649
Index- 8650
Index- 8651
Index- 8652
Index- 8653
Index- 8654
Index- 8656
Index- 8657
Index- 8658
Index- 8659
Index- 8660
Index- 8661
Index- 8662
Index- 8663
Index- 8664
Index- 8665
Index- 8666
Index- 8667
Index- 8669
Index- 8670
Index- 8671
Index- 8672
Index- 8673
Index- 8674
Index- 8675
Index- 8676
Index- 8677
Index- 8678
Index- 8680
Index- 8681
Index- 8682
Index- 8683
Index- 8684
Index- 8685
Index- 8686
Index- 8687
Index- 8688
Index- 8689
Index- 8690
Index- 8691
Index- 8692
Index- 8693
Index- 8694
Index- 8695
Index- 8696
Index- 8697
Index- 8698
Index- 8699
Index- 8700
Index- 8701
Index- 8702
Index- 8704
Index- 8705
Index- 8706
Index- 8708
Index- 8709
Index- 8711
Index- 8712
Index- 8713
Index- 8714
Index- 8715
Index- 8716
Inde

Index- 9402
Index- 9403
Index- 9404
Index- 9405
Index- 9406
Index- 9407
Got error at Index- 9407
Index- 9408
Index- 9409
Index- 9410
Index- 9411
Index- 9412
Index- 9413
Index- 9414
Index- 9415
Index- 9416
Index- 9417
Index- 9418
Index- 9419
Index- 9420
Index- 9421
Got error at Index- 9421
Index- 9422
Index- 9423
