# CNN

Image Similarity using CNN feature embeddings
https://github.com/totogot/ImageSimilarity

# 模型

In [5]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image

class Img2Vec:
    def __init__(self, model_name='resnet50', weights='DEFAULT'):
        self.architecture = model_name
        self.weights = weights
        self.transform = self.assign_transform(weights)
        self.device = self.set_device()
        self.model = self.initiate_model()
        self.embed = self.assign_layer()
        print("Model initialized")

    def assign_transform(self, weights):
        return transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def set_device(self):
        return 'cuda:0' if torch.cuda.is_available() else 'cpu'

    def initiate_model(self):
        model = getattr(models, self.architecture)(weights=self.weights)
        model.to(self.device)
        return model.eval()

    def assign_layer(self):
        return nn.Sequential(*list(self.model.children())[:-1])

    def embed_image(self, img_path):
        img = Image.open(img_path).convert('RGB')
        img_trans = self.transform(img).unsqueeze(0).to(self.device)
        with torch.no_grad():
            embedding = self.embed(img_trans)
        return embedding.squeeze()
    

    def embed_images(self, upper_img_path, lower_img_path):
        upper_embedding = self.embed_image(upper_img_path)
        lower_embedding = self.embed_image(lower_img_path)
        #print(f"Upper image tensor shape: {upper_embedding}")
        #print(f"Lower image tensor shape: {lower_image_tensor.shape}")
        # Concatenate the two embeddings
        combined_embedding = torch.cat((upper_embedding, lower_embedding), dim=0)
        
        return upper_embedding, lower_embedding, combined_embedding


# 產生 combine embeding 為4096的向量，因為太大存進 csv 會變亂碼，所以改用 pickle 存

### 好的Embeding

In [43]:
import pandas as pd
import os
import numpy as np
# 创建一个空的 DataFrame
import json

data = {
    'index': [],
    'img_pathE': [],
    'img_pathQ': [],
    'predEQ': [],
    'label': [],
    'embedE': [],
    'embedQ': []
}


img2vec = Img2Vec('resnet50', weights='IMAGENET1K_V2')


df=pd.read_csv(r'C:\Users\chen\資管專題\good_outfits.csv')


for index, row in df.iterrows():
    
    upper_picture_path = df.at[index, 'img_pathE'].replace(':', '_').replace('../new_data/cut_style', 'C://Users//chen//資管專題//data//cut_style')
    lower_picture_path = df.at[index, 'img_pathQ'].replace(':', '_').replace('../new_data/cut_style', 'C://Users//chen//資管專題//data//cut_style')
    
    upper_embedding, lower_embedding, combined_embedding = img2vec.embed_images(upper_picture_path, lower_picture_path)
    
    upper_embedding = np.array(upper_embedding)
    lower_embedding = np.array(lower_embedding)
    combined_embedding = np.array(combined_embedding)
    
    data['index'].append(df.at[index, 'index'])
    data['img_pathE'].append(df.at[index, 'img_pathE'])
    data['img_pathQ'].append(df.at[index, 'img_pathQ'])
    data['label'].append('1')
    data['predEQ'].append(combined_embedding)#('['+','.join(map(str, combined_embedding))+']')
    data['embedE'].append(upper_embedding)#('['+','.join(map(str, upper_embedding))+']')
    data['embedQ'].append(lower_embedding)#('['+','.join(map(str, lower_embedding))+']')
 

data = pd.DataFrame(data)
data.to_pickle('data.pkl')
print(f"Combined embedding shape: {combined_embedding.shape}")
print('ok')
# df['img_pathE'] = df['img_pathE'].str.replace(':', '_').str.replace('../new_data/cut_style', 'C://Users//chen//資管專題//data//cut_style')
# df['img_pathQ'] = df['img_pathQ'].str.replace(':', '_').str.replace('../new_data/cut_style', 'C://Users//chen//資管專題//data//cut_style')
print(data.head())

# 
# 

Model initialized
Combined embedding shape: (4096,)
ok
  index                                 img_pathE  \
0   0_1  ../new_data/cut_style:america/0_1_E_.jpg   
1   0_3  ../new_data/cut_style:america/0_3_E_.jpg   
2   0_4  ../new_data/cut_style:america/0_4_E_.jpg   
3   0_7  ../new_data/cut_style:america/0_7_E_.jpg   
4   0_8  ../new_data/cut_style:america/0_8_E_.jpg   

                                  img_pathQ  \
0  ../new_data/cut_style:america/0_1_Q_.jpg   
1  ../new_data/cut_style:america/0_3_Q_.jpg   
2  ../new_data/cut_style:america/0_4_Q_.jpg   
3  ../new_data/cut_style:america/0_7_Q_.jpg   
4  ../new_data/cut_style:america/0_8_Q_.jpg   

                                              predEQ label  \
0  [0.0, 0.0, 0.0, 0.0, 0.0011892455, 0.0, 0.0, 2...     1   
1  [0.0, 0.0, 0.0071198223, 0.14539345, 0.0, 0.0,...     1   
2  [0.0, 0.0, 0.0, 0.20410387, 0.17157467, 0.1296...     1   
3  [0.0, 0.005689638, 0.0, 0.12158056, 0.13749401...     1   
4  [0.0, 0.0, 0.27391717, 0.0, 0.

### 糟的Embeding

In [None]:
import pandas as pd
import os
import numpy as np
# 创建一个空的 DataFrame
import json

data = {
    'idxE': [],
    'idxQ': [],
    'img_pathE': [],
    'img_pathQ': [],
    'predEQ': [],
    'label': [],
    'embedE': [],
    'embedQ': []
}


img2vec = Img2Vec('resnet50', weights='IMAGENET1K_V2')


df=pd.read_csv(r'C:\Users\chen\資管專題\bad_outfits.csv')


for index, row in df.iterrows():
    
    upper_picture_path = df.at[index, 'img_pathE'].replace(':', '_').replace('../new_data/cut_style', 'C://Users//chen//資管專題//data//cut_style')
    lower_picture_path = df.at[index, 'img_pathQ'].replace(':', '_').replace('../new_data/cut_style', 'C://Users//chen//資管專題//data//cut_style')
    
    upper_embedding, lower_embedding, combined_embedding = img2vec.embed_images(upper_picture_path, lower_picture_path)
    
    upper_embedding = np.array(upper_embedding)
    lower_embedding = np.array(lower_embedding)
    combined_embedding = np.array(combined_embedding)
    
    data['idxE'].append(df.at[index, 'idxE'])
    data['idxQ'].append(df.at[index, 'idxQ'])
    data['img_pathE'].append(df.at[index, 'img_pathE'])
    data['img_pathQ'].append(df.at[index, 'img_pathQ'])
    data['label'].append('0')
    data['predEQ'].append(combined_embedding)#('['+','.join(map(str, combined_embedding))+']')
    data['embedE'].append(upper_embedding)#('['+','.join(map(str, upper_embedding))+']')
    data['embedQ'].append(lower_embedding)#('['+','.join(map(str, lower_embedding))+']')
 

data = pd.DataFrame(data)
data.to_pickle('data.pkl')
print(f"Combined embedding shape: {combined_embedding.shape}")
print('ok')
# df['img_pathE'] = df['img_pathE'].str.replace(':', '_').str.replace('../new_data/cut_style', 'C://Users//chen//資管專題//data//cut_style')
# df['img_pathQ'] = df['img_pathQ'].str.replace(':', '_').str.replace('../new_data/cut_style', 'C://Users//chen//資管專題//data//cut_style')
print(data.head())

# 
# 

In [30]:
import pandas as pd
import os
import numpy as np
# 创建一个空的 DataFrame
data = {
    'index': [],
    'img_path': [],
    'predEQ': [],
    'label': [],
    'embedE': [],
    'embedQ': []
}



img2vec = Img2Vec('resnet50', weights='IMAGENET1K_V2')

three_style = ['korea','japan','america']
three_style_index={'korea':'2','japan':'1','america':'0'}
three_style_each_num =1059
ten_occation=['wedding_guest','travel','sports','shopping','school','porm','party','dating','daily_work','conference']
ten_occation_index={'wedding_guest':'12','travel':'5','sports':'6','shopping':'9','school':'11','porm':'8','party':'10','dating':'3','daily_work':'4','conference':'7'}
ten_occation_each_num = 1005


for style in three_style:
    file_path='C://Users//chen//資管專題//data//'+'cut_style_'+ style +'//'
    for i in range(1,three_style_each_num+1):
        img_path = '../new_data/style:'+style+'/'+ str(i)+'.jpg'
        upper_picture = three_style_index[style]+'_'+str(i)+'_E_.jpg'
        lower_picture = three_style_index[style]+'_'+str(i)+'_Q_.jpg'
    #upper_embedding, lower_embedding, combined_embedding = img2vec.embed_images(r"C:\Users\chen\資管專題\skirt.png", r"C:\Users\chen\資管專題\skirt.png")

        upper_picture_path = os.path.join(file_path, upper_picture)
        lower_picture_path = os.path.join(file_path, lower_picture)

        if os.path.exists(lower_picture_path)==False and os.path.exists(upper_picture_path)==False:
            continue
        elif os.path.exists(lower_picture_path)==False:
            upper_embedding = img2vec.embed_image(upper_picture_path)
            lower_embedding=None
            combined_embedding=None
        elif os.path.exists(upper_picture_path)==False:
            lower_embedding = img2vec.embed_image(lower_picture_path)
            upper_embeddin=None
            combined_embedding=None
        else:
            upper_embedding, lower_embedding, combined_embedding = img2vec.embed_images(upper_picture_path, lower_picture_path)
        
        # Convert to numpy arrays if not None
        if upper_embedding is not None:
            upper_embedding = np.array(upper_embedding)
        if lower_embedding is not None:
            lower_embedding = np.array(lower_embedding)
        if combined_embedding is not None:
            combined_embedding = np.array(combined_embedding)
            
        #print(f"Upper embedding: {upper_embedding}")
        #print(three_style_index[style]+'_'+str(i))
        data['index'].append(three_style_index[style] + '_' + str(i))
        data['img_path'].append(img_path)
        data['predEQ'].append(combined_embedding)
        data['embedE'].append(upper_embedding)
        data['embedQ'].append(lower_embedding)
        data['label'].append(1)
    
    df = pd.DataFrame(data)
    name = 'cut_style_'+ style+'.csv'
    df.to_csv(name, index=False)
    print('ok')



Model initialized


KeyboardInterrupt: 

In [None]:

print(f"Upper embedding shape: {upper_embedding.shape}")
print(f"Upper embedding: {upper_embedding}")
print(f"Lower embedding shape: {lower_embedding.shape}")
print(f"Lower embedding shape: {lower_embedding}")
print(f"Combined embedding shape: {combined_embedding.shape}")
print('ok')

In [None]:
Dating：3
Daily Work：4
Travel：5
Sports：6
Conference：7
Prom：8
Shopping：9
Party：10
School：11
Wedding Guest：12

# OpenAI clip api

https://medium.com/@highsunday0630/image-embedding-1-clip%E6%A8%A1%E5%9E%8B%E6%8F%90%E5%8F%96-image-embedding-%E4%B8%A6%E4%BB%A5-tensorboard-%E8%A6%96%E8%A6%BA%E5%8C%96%E6%95%88%E6%9E%9C-dc281370d7d8