### 指定GPU来跑代码

In [1]:
import os
from tensorflow.python.client import device_lib
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

### 获取数据

#### 获取所有文本数据、图片数据和标签数据

##### 所有文本数据、图片数据、图片id数据

In [2]:
from PIL import Image
import numpy as np
import os

image_data={} #所有图片数据
text_data={} #所有文本数据

for i in range(1,5130):
    img_path='data/'+str(i)+'.jpg'
    text_path='data/'+str(i)+'.txt'
    if os.path.exists(img_path) and os.path.exists(text_path):
        #读图片数据
        img=Image.open(img_path).resize((224,224))
        img_array=np.array(img)
        image_data[i]=img_array
        #读文本数据
        with open(text_path,'r',encoding='utf-8',errors='replace') as file:
            text_str=file.read()
            text_data[i]=text_str


In [3]:
#按key序排序
sorted_image_data=dict(sorted(image_data.items(), key=lambda x: x[0]))
sorted_text_data=dict(sorted(text_data.items(), key=lambda x: x[0]))

In [4]:
print(sorted_image_data[1])
print(sorted_image_data[1])

[[[116  65  22]
  [118  63  21]
  [124  64  21]
  ...
  [213 141  66]
  [220 147  70]
  [211 138  61]]

 [[117  66  23]
  [119  64  21]
  [124  64  21]
  ...
  [218 145  70]
  [226 152  75]
  [219 144  67]]

 [[118  67  23]
  [119  65  21]
  [124  64  21]
  ...
  [225 149  74]
  [237 160  84]
  [231 154  78]]

 ...

 [[  7   2   5]
  [  9   3   1]
  [ 20  11   2]
  ...
  [100  72  62]
  [114  90  81]
  [104  84  73]]

 [[  6   2   4]
  [  9   3   1]
  [ 19  11   2]
  ...
  [ 91  63  53]
  [ 88  65  54]
  [ 73  54  43]]

 [[  5   0   3]
  [ 10   3   1]
  [ 19  10   1]
  ...
  [ 84  56  46]
  [ 87  63  53]
  [ 63  44  33]]]
[[[116  65  22]
  [118  63  21]
  [124  64  21]
  ...
  [213 141  66]
  [220 147  70]
  [211 138  61]]

 [[117  66  23]
  [119  64  21]
  [124  64  21]
  ...
  [218 145  70]
  [226 152  75]
  [219 144  67]]

 [[118  67  23]
  [119  65  21]
  [124  64  21]
  ...
  [225 149  74]
  [237 160  84]
  [231 154  78]]

 ...

 [[  7   2   5]
  [  9   3   1]
  [ 20  11   2]
  ..

##### 根据guid获取到对应的label

In [5]:
label_data=dict()
with open('train.txt','r',encoding='utf-8') as label_file:
    next(label_file) #跳过第一行（标题行）
    for line in label_file:
        key, value = line.strip().split(',')
        label_data[int(key)]=value

#将label_data按照guid排序
sorted_label_data = dict(sorted(label_data.items(), key=lambda x: x[0]))

In [6]:
test_guid=list()
with open('test_without_label.txt','r',encoding='utf-8') as test_id_file:
    next(test_id_file)
    for line in test_id_file:
        key,value=line.strip().split(',')
        test_guid.append(int(key))

##### 将文本数据进行word2vec向量化

In [7]:
#文本数据的预处理
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
sorted_pure_text_data=dict() #存储每个句子分词后的结果
#文本数据的预处理
for key in sorted_text_data:
    # print(sorted_text_data[key])
    #正则表达式匹配非字母、数字和空格的字符并删除
    pattern = r'[^a-zA-Z0-9\s]'
    cleaned_text = re.sub(pattern, '', sorted_text_data[key])
    #去除停止词，并都转化为小写字母
    stop_words=set(stopwords.words('english'))
    tokens=cleaned_text.split() #分词
    filtered_tokens = [word.lower() for word in tokens if word.lower() not in stop_words]  # 去除停止词
    # tokens=sorted_text_data[key].split() #分词
    # filtered_tokens = [word.lower() for word in tokens]  # 去除停止词
    sorted_pure_text_data[key]=filtered_tokens
    # print(sorted_pure_text_data[key])


In [8]:
list(sorted_pure_text_data.values())[0]

['feel', 'today', 'legday', 'jelly', 'aching', 'gym']

In [9]:
#word2vec对文本数据进行词向量化
from gensim.models import Word2Vec
w2v_model=Word2Vec(list(sorted_pure_text_data.values()),min_count=5,vector_size=50)
word_vec_matrix=w2v_model.wv #词向量矩阵
#向词向量矩阵中添加<pad>和<unk>对应的词向量
#每个词对应的词向量的维度
embedding_dim=w2v_model.vector_size
unk_vector=np.random.uniform(-1,1,embedding_dim)
pad_vector=np.zeros(embedding_dim)
#添加新的词向量
word_vec_matrix.add_vectors(keys=['<unk>','<pad>'], weights=[unk_vector, pad_vector])
#生成的词向量矩阵的词典中含有的词的List
w2v_word_vocab=word_vec_matrix.index_to_key

In [10]:
#得到词到id，id到词的映射词典
word2index=dict()
for i in range(len(w2v_word_vocab)):
    word2index[w2v_word_vocab[i]]=i
index2word=dict()
for i in range(len(word2index.items())):
    index2word[list(word2index.values())[i]]=list(word2index.keys())[i]

### 划分训练集，验证集和测试集

In [11]:
import random
val_idx=list()
train_idx=list()

val_idx=random.sample(list(sorted_label_data.keys()),int(len(sorted_label_data.keys())*0.2))
for idx in list(sorted_label_data.keys()):
    if idx not in val_idx:
        train_idx.append(idx)

In [12]:
#构造验证集
val_text=list()
val_image=list()
val_label_target=list()
val_guid=sorted(val_idx)
for val_idx in val_guid:
    val_text.append(sorted_pure_text_data[val_idx]) #验证集文本数据
    val_image.append(sorted_image_data[val_idx]) #对应的验证集图像数据
    val_label_target.append(sorted_label_data[val_idx])

#构造训练集
train_text=list()
train_image=list()
train_label=list()
train_guid=sorted(train_idx)
for train_idx in train_guid:
    train_text.append(sorted_pure_text_data[train_idx])
    train_image.append(sorted_image_data[train_idx])
    train_label.append(sorted_label_data[train_idx])

#构造测试集
test_text=list()
test_image=list()
for test_idx in test_guid:
    test_text.append(sorted_pure_text_data[test_idx])
    test_image.append(sorted_image_data[test_idx])

### 对于文本数据，构建LSTM模型

#### 对文本数据的预处理

In [13]:
#将词转换成索引编码
train_text2id=list()
for sentence in train_text:
    sentence_id_list=list()
    for word in sentence:
        if word not in w2v_word_vocab:
            word='<unk>'
        text2id=word2index[word]
        sentence_id_list.append(text2id)
    train_text2id.append(sentence_id_list)

val_text2id=list()
for sentence in val_text:
    sentence_id_list=list()
    for word in sentence:
        if word not in w2v_word_vocab:
            word='<unk>'
        text2id=word2index[word]
        sentence_id_list.append(text2id)
    val_text2id.append(sentence_id_list)

test_text2id=list()
for sentence in test_text:
    sentence_id_list=list()
    for word in sentence:
        if word not in w2v_word_vocab:
            word='<unk>'
        text2id=word2index[word]
        sentence_id_list.append(text2id)
    test_text2id.append(sentence_id_list)

In [14]:
#获取句子的最大长度
max_length=-1
for i in range(len(list(sorted_pure_text_data.values()))):
    if len(list(sorted_pure_text_data.values())[i])>=max_length:
        max_length=len(list(sorted_pure_text_data.values())[i])
print(max_length)

22


In [15]:
#将句子的长度都补到最长
from tensorflow.keras.preprocessing.sequence import pad_sequences
train_text_pad=pad_sequences(train_text2id,maxlen=max_length,padding='post',value=word2index['<pad>'])
val_text_pad=pad_sequences(val_text2id,maxlen=max_length,padding='post',value=word2index['<pad>'])
test_text_pad=pad_sequences(test_text2id,maxlen=max_length,padding='post',value=word2index['<pad>'])

In [16]:
print(train_text_pad[0])

[  57    9 1527 1527  415  847 1528 1528 1528 1528 1528 1528 1528 1528
 1528 1528 1528 1528 1528 1528 1528 1528]


In [17]:
#处理标签label
from keras.utils import to_categorical
# 将情感标签转换为数字标签
labels=['positive','negative','neutral']
label_to_index = {'positive': 0, 'negative': 1, 'neutral': 2}
index_to_label = {0: 'positive', 1: 'negative', 2: 'neutral'}

train_label_idx = np.array([label_to_index[label] for label in train_label])
val_label_target_idx = np.array([label_to_index[label] for label in val_label_target])
#将标签进行独热编码
train_label_one_hot = to_categorical(train_label_idx)
val_label_one_hot = to_categorical(val_label_target_idx)

#### 构造对于文本数据用于情感分类的LSTM框架

In [18]:
#设计模型结构
from keras.layers import Embedding, LSTM, Dense, Dropout,Input

text_input=Input(shape=(max_length,))
text_embed=Embedding(input_dim=len(w2v_word_vocab),output_dim=embedding_dim,weights=[word_vec_matrix.vectors],trainable=False)(text_input)
text_features=LSTM(units=128)(text_embed)

2023-07-14 22:20:38.483824: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-14 22:20:39.129944: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22310 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:e1:00.0, compute capability: 8.6


### 构造用于对图片数据进行建模的VGGNet_19神经网络

In [19]:
from keras.applications.vgg19 import VGG19
from keras.layers import Input, Flatten, Dense, Concatenate
from keras.models import Model
#对图像数据进行预处理
#归一化
# train_image=train_image.reshape(train_image.shape[0],224,224,3)
train_image=np.array(train_image)/255.0
val_image=np.array(val_image)/255.0
test_image=np.array(test_image)/255.0
#构建模型
base_model = VGG19(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
image_input = Input(shape=(224, 224,3))
image_features = base_model(image_input)
image_features = Flatten()(image_features)

### 构建多模态融合模型

In [20]:
#融合文本和图像特征
combined_features = Concatenate()([text_features, image_features])
predictions = Dense(3, activation='softmax')(combined_features)
#构建多模态融合模型
model = Model(inputs=[text_input, image_input], outputs=predictions)
#编译模型
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 22)]         0           []                               
                                                                                                  
 input_3 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 embedding (Embedding)          (None, 22, 50)       76450       ['input_1[0][0]']                
                                                                                                  
 vgg19 (Functional)             (None, 7, 7, 512)    20024384    ['input_3[0][0]']            

### 在训练集上训练，并在验证集上进行验证

In [21]:
model.fit(x=[train_text_pad,train_image],y=train_label_one_hot,epochs=10,batch_size=32,validation_data=([val_text_pad,val_image],val_label_one_hot))

Epoch 1/10


2023-07-14 22:20:49.617429: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8101
2023-07-14 22:20:51.972256: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f69a07d90d0>

In [22]:
# 使用模型进行预测
lstm_vgg_predictions = model.predict([test_text_pad, test_image])

# 将预测结果转换为标签
predicted_labels_1 = np.argmax(lstm_vgg_predictions, axis=1)
predicted_labels_1 = [index_to_label[label] for label in predicted_labels_1]

# 打印预测结果
for i, label in enumerate(predicted_labels_1):
    print(f"Sample {i+1}: Predicted Label = {label}")

Sample 1: Predicted Label = positive
Sample 2: Predicted Label = positive
Sample 3: Predicted Label = positive
Sample 4: Predicted Label = positive
Sample 5: Predicted Label = positive
Sample 6: Predicted Label = positive
Sample 7: Predicted Label = positive
Sample 8: Predicted Label = positive
Sample 9: Predicted Label = positive
Sample 10: Predicted Label = positive
Sample 11: Predicted Label = positive
Sample 12: Predicted Label = positive
Sample 13: Predicted Label = positive
Sample 14: Predicted Label = positive
Sample 15: Predicted Label = positive
Sample 16: Predicted Label = positive
Sample 17: Predicted Label = positive
Sample 18: Predicted Label = positive
Sample 19: Predicted Label = positive
Sample 20: Predicted Label = positive
Sample 21: Predicted Label = positive
Sample 22: Predicted Label = positive
Sample 23: Predicted Label = positive
Sample 24: Predicted Label = positive
Sample 25: Predicted Label = positive
Sample 26: Predicted Label = positive
Sample 27: Predicted 

In [23]:
file_path_1='test_lstm_vgg.txt'
with open(file_path_1,'w') as file:
    file.write('guid,tag'+'\n')
    for i in range(len(predicted_labels_1)):
        file.write(str(test_guid[i])+','+str(predicted_labels_1[i])+'\n')

### 将用于图片训练的rggNet改为resnet50试试，把lstm改成双向lstm


In [24]:
from tensorflow.keras.applications import ResNet50
from keras.layers import Input, Flatten, Dense, Concatenate, Bidirectional
from keras.models import Model

#构建模型
image_input_resnet = Input(shape=(224, 224,3))
resnet_model  = ResNet50(include_top=False, weights='imagenet', input_tensor=image_input_resnet)
image_features_resnet = resnet_model.output
image_features_resnet = Flatten()(image_features_resnet)
# image_features_resnet = Dropout(rate=0.1)(image_features_resnet)

text_input_bilstm=Input(shape=(max_length,))
text_embed_bilstm=Embedding(input_dim=len(w2v_word_vocab),output_dim=embedding_dim,weights=[word_vec_matrix.vectors],trainable=False)(text_input_bilstm)
lstm_layer=LSTM(units=128)
bidirectional_lstm = Bidirectional(lstm_layer)
text_features_bilstm = bidirectional_lstm(text_embed_bilstm)
# text_features_bilstm = Dropout(rate=0.1)(text_features_bilstm)

In [25]:
from keras.optimizers import Adam
#构建多模态模型(lstm+resnet)
#融合文本和图像特征
combined_features_2 = Concatenate()([text_features_bilstm, image_features_resnet])
combined_features = Dropout(rate=0.1)(combined_features_2)
predictions_2 = Dense(3, activation='softmax')(combined_features_2)
#构建多模态融合模型
lstm_resnet_model = Model(inputs=[text_input_bilstm, image_input_resnet], outputs=predictions_2)
#编译模型
learning_rate = 0.0025  # 设置所需的学习率
optimizer = Adam(learning_rate=learning_rate)
lstm_resnet_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
# lstm_resnet_model.summary()

In [26]:
lstm_resnet_model.fit(x=[train_text_pad,train_image],y=train_label_one_hot,epochs=10,batch_size=32,validation_data=([val_text_pad,val_image],val_label_one_hot))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f693845cb80>

In [27]:
# 使用模型进行预测
lstm_resnet_predictions = lstm_resnet_model.predict([test_text_pad, test_image])

# 将预测结果转换为标签
predicted_labels = np.argmax(lstm_resnet_predictions, axis=1)
predicted_labels = [index_to_label[label] for label in predicted_labels]

# 打印预测结果
for i, label in enumerate(predicted_labels):
    print(f"Sample {i+1}: Predicted Label = {label}")

Sample 1: Predicted Label = positive
Sample 2: Predicted Label = positive
Sample 3: Predicted Label = positive
Sample 4: Predicted Label = positive
Sample 5: Predicted Label = positive
Sample 6: Predicted Label = positive
Sample 7: Predicted Label = positive
Sample 8: Predicted Label = positive
Sample 9: Predicted Label = positive
Sample 10: Predicted Label = positive
Sample 11: Predicted Label = positive
Sample 12: Predicted Label = positive
Sample 13: Predicted Label = positive
Sample 14: Predicted Label = positive
Sample 15: Predicted Label = positive
Sample 16: Predicted Label = positive
Sample 17: Predicted Label = negative
Sample 18: Predicted Label = positive
Sample 19: Predicted Label = positive
Sample 20: Predicted Label = positive
Sample 21: Predicted Label = positive
Sample 22: Predicted Label = positive
Sample 23: Predicted Label = positive
Sample 24: Predicted Label = positive
Sample 25: Predicted Label = positive
Sample 26: Predicted Label = positive
Sample 27: Predicted 

In [28]:
file_path_2='test_bilstm_resnet.txt'
with open(file_path_2,'w') as file:
    file.write('guid,tag'+'\n')
    for i in range(len(predicted_labels)):
        file.write(str(test_guid[i])+','+str(predicted_labels[i])+'\n')

### 消融实验

#### 消融实验1：lstm

In [31]:
predictions_text = Dense(3, activation='softmax')(text_features)

# 构建仅文本输入的模型
text_model = Model(inputs=text_input, outputs=predictions_text)
text_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 仅使用文本数据进行训练
text_model.fit(train_text_pad, train_label_one_hot, epochs=10, batch_size=32, validation_data=(val_text_pad, val_label_one_hot))

# 在验证集上评估模型
text_model.evaluate(val_text_pad, val_label_one_hot)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.8920366168022156, 0.6012499928474426]

#### 消融实验2：vggnet_19

In [35]:
predictions_images = Dense(3, activation='softmax')(image_features)
# 构建仅图像输入的模型
image_model = Model(inputs=image_input, outputs=predictions_images)
image_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 仅使用图像数据进行训练
image_model.fit(train_image, train_label_one_hot, epochs=10, batch_size=32, validation_data=(val_image, val_label_one_hot))

# 在验证集上评估模型
image_model.evaluate(val_image, val_label_one_hot)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.8997756242752075, 0.6050000190734863]

#### 消融实验3：bi-lstm

In [37]:
predictions_text_bilstm = Dense(3, activation='softmax')(text_features_bilstm)

# 构建文本模型
text_model = Model(inputs=text_input_bilstm, outputs=predictions_text_bilstm)
text_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
text_model.fit(train_text_pad, train_label_one_hot, epochs=10, batch_size=32, validation_data=(val_text_pad, val_label_one_hot))

# 在验证集上评估模型
text_model.evaluate(val_text_pad, val_label_one_hot)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.8765424489974976, 0.6037499904632568]

#### 消融实验4：ResNet_50

In [38]:
predictions_images_resnet = Dense(3, activation='softmax')(image_features_resnet)
# 构建图像模型
image_model = Model(inputs=image_input_resnet, outputs=predictions_images_resnet)
image_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
image_model.fit(train_image, train_label_one_hot, epochs=10, batch_size=32, validation_data=(val_image, val_label_one_hot))

# 在验证集上评估模型
image_model.evaluate(val_image, val_label_one_hot)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[1.0930933952331543, 0.59375]