### model

In [1]:
# im-project-main/models
MODEL_PATH = "lib/df1.pkl"
CLASSES_PATH = "lib/attribute-classes.txt"

In [2]:
import torch
from torchvision import transforms
from torch.autograd import Variable
from PIL import Image
import torch.nn as nn
import os


class AttributePredictionModel:
    
    def __init__(self):
        self.model = None
        self.labels = []
        
    def load(self, model_path, labels_path, eval_mode=False):
        self.model = torch.load(model_path)
        self.model.eval()  # mode: eval
        self.labels = open(labels_path, 'r').read().splitlines()
        
        if eval_mode:
            print(self.model)
    
    def predict(self, image_path, threshold=0.1):
        device = torch.device("cpu")
        img = Image.open(image_path).convert('RGB')
        
        test_transforms = transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        
        image_tensor = test_transforms(img).float()
        image_tensor = image_tensor.unsqueeze_(0)  # 增加 batch 維度
        inp = Variable(image_tensor).to(device)
        
        with torch.no_grad():
            output = self.model(inp)
        
        # raw output --> prob[0, 1], pred{0, 1}, att
        probabilities = torch.sigmoid(output).cpu().numpy()[0]
        predictions = (probabilities >= threshold).astype(int)
        predicted_attributes = [self.labels[i] for i in range(len(predictions)) if predictions[i] == 1]
        return probabilities, predictions, predicted_attributes

# model instance
learner = AttributePredictionModel()
learner.load(MODEL_PATH, CLASSES_PATH)

### img2vec

In [3]:
def img2vec(imgE_path, imgQ_path):
    
    # img -> prob or multi-hot
    probE, predE, attE = learner.predict(imgE_path)
    probQ, predQ, attQ = learner.predict(imgQ_path)

    probEQ = list(probE) + list(probQ)
    predEQ = list(predE) + list(predQ)

    #print("probEQ", probEQ)
    #print("predEQ", predEQ)

    #print(f"Predicted attributes (E): {attE}")
    #print(f"Predicted attributes (Q): {attQ}")
    
    return probEQ, probE, probQ, predEQ, predE, predQ

### good data (input good data, output good embeddings)

In [10]:
import csv

# input outfits file
outfits_file = '../outfits/good_outfits.csv'

# output embedding file
prob_file = 'good_embedding_probEQ.csv'
pred_file = 'good_embedding_predEQ.csv'
label = 1  ### 1 or 0

prob_rows = []
pred_rows = []

# read outfits_file
with open(outfits_file, mode='r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        index = row['index']  ### good
        img_path = row['img_path']  ### good
        img_pathE = row['img_pathE']
        img_pathQ = row['img_pathQ']
        
        probEQ, probE, probQ, predEQ, predE, predQ = img2vec(img_pathE, img_pathQ)
        
        prob_rows.append([index, img_path, probEQ, label, probE, probQ])  ### good
        pred_rows.append([index, img_path, predEQ, label, predE, predQ])  ### good

# write prob_file
with open(prob_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['index', 'img_path', 'probEQ', 'label', 'probE', 'probQ'])  ### good
    writer.writerows(prob_rows)

# write pred_file
with open(pred_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['index', 'img_path', 'predEQ', 'label', 'predE', 'predQ'])  ### good
    writer.writerows(pred_rows)

print(f"機率的 embedding 儲存在 '{prob_file}' 檔案中，共 {len(prob_rows)} 筆資料。")
print(f"向量長度依序為 outfits({len(prob_rows[0][2])}), 上/下衣({len(prob_rows[0][4])})。")
print(f"向量長相 {prob_rows[0][4][:5]}... ")
print()
print(f"multi-hot 的 embedding 儲存在 '{pred_file}' 檔案中，共 {len(pred_rows)} 筆資料。")
print(f"向量長度依序為 outfits({len(pred_rows[0][2])}), 上/下衣({len(pred_rows[0][4])})。")
print(f"向量長相 {pred_rows[0][4][:5]}... ")

機率的 embedding 儲存在 'good_embedding_probEQ.csv' 檔案中，共 2094 筆資料。
向量長度依序為 outfits(196), 上/下衣(98)。
向量長相 [0.05056207 0.04765347 0.04989205 0.05843196 0.05016752]... 

multi-hot 的 embedding 儲存在 'good_embedding_predEQ.csv' 檔案中，共 2094 筆資料。
向量長度依序為 outfits(196), 上/下衣(98)。
向量長相 [0 0 0 0 0]... 


### good4 data (追加台風)

In [12]:
import csv
import shutil # copy csv
import itertools # skip row (csv)

# input outfits file
outfits_file = '../outfits/good4_outfits.csv'

# output embedding file
old_prob_file = '../embeddings/model_train/good_embedding_probEQ.csv'
old_pred_file = '../embeddings/model_train/good_embedding_predEQ.csv'
prob_file = 'good4_embedding_probEQ.csv'
pred_file = 'good4_embedding_predEQ.csv'
shutil.copyfile(old_prob_file, prob_file)
shutil.copyfile(old_pred_file, pred_file)

label = 1  ### 1 or 0

prob_rows = []
pred_rows = []

# read outfits_file
with open(outfits_file, mode='r') as file:
    reader = csv.DictReader(file)
    t_flag = False
    
    for row in reader:
        index = row['index']  ### good
        
        if index.startswith("14_"): # skip 前段
            t_flag = True
        
        if t_flag:
            img_path = row['img_path']  ### good
            img_pathE = row['img_pathE']
            img_pathQ = row['img_pathQ']

            probEQ, probE, probQ, predEQ, predE, predQ = img2vec(img_pathE, img_pathQ)

            prob_rows.append([index, img_path, probEQ, label, probE, probQ])  ### good
            pred_rows.append([index, img_path, predEQ, label, predE, predQ])  ### good

# write prob_file
with open(prob_file, mode='a+', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(prob_rows)

# write pred_file
with open(pred_file, mode='a+', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(pred_rows)

# counter
cntProb = 0
cntPred = 0
with open(prob_file, mode='r', newline='') as file:
    reader = csv.DictReader(file)
    row_count = sum(1 for row in reader)
    cntProb = row_count - 1  # 應該不用扣掉才對
    
with open(pred_file, mode='r', newline='') as file:
    reader = csv.DictReader(file)
    row_count = sum(1 for row in reader)
    cntPred = row_count - 1  # 應該不用扣掉才對
    
print(f"機率的 embedding 儲存在 '{prob_file}' 檔案中，共 {cntProb} 筆資料。")
print(f"向量長度依序為 outfits({len(prob_rows[0][2])}), 上/下衣({len(prob_rows[0][4])})。")
print(f"向量長相 {prob_rows[0][4][:5]}... ")
print()
print(f"multi-hot 的 embedding 儲存在 '{pred_file}' 檔案中，共 {cntPred} 筆資料。")
print(f"向量長度依序為 outfits({len(pred_rows[0][2])}), 上/下衣({len(pred_rows[0][4])})。")
print(f"向量長相 {pred_rows[0][4][:5]}... ")

機率的 embedding 儲存在 'good4_embedding_probEQ.csv' 檔案中，共 2791 筆資料。
向量長度依序為 outfits(196), 上/下衣(98)。
向量長相 [0.04968239 0.05269612 0.05309688 0.07222343 0.05555998]... 

multi-hot 的 embedding 儲存在 'good4_embedding_predEQ.csv' 檔案中，共 2791 筆資料。
向量長度依序為 outfits(196), 上/下衣(98)。
向量長相 [0 0 0 0 0]... 


### bad data (input bad data, output bad embeddings)

In [13]:
import csv

# input outfits file
outfits_file = '../outfits/bad_outfits.csv'

# output embedding file
prob_file = 'bad_embedding_probEQ.csv'
pred_file = 'bad_embedding_predEQ.csv'
label = 0  ### 1 or 0

prob_rows = []
pred_rows = []

# read outfits_file
with open(outfits_file, mode='r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        idxE = row['idxE']  ### bad
        idxQ = row['idxQ']  ### bad
        img_pathE = row['img_pathE']
        img_pathQ = row['img_pathQ']
        
        probEQ, probE, probQ, predEQ, predE, predQ = img2vec(img_pathE, img_pathQ)
        
        prob_rows.append([idxE, idxQ, img_pathE, img_pathQ, probEQ, label, probE, probQ])  ### bad
        pred_rows.append([idxE, idxQ, img_pathE, img_pathQ, predEQ, label, predE, predQ])  ### bad

# write prob_file
with open(prob_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['idxE', 'idxQ', 'img_pathE', 'img_pathQ', 'probEQ', 'label', 'probE', 'probQ'])  ### bad
    writer.writerows(prob_rows)

# write pred_file
with open(pred_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['idxE', 'idxQ', 'img_pathE', 'img_pathQ', 'predEQ', 'label', 'predE', 'predQ'])  ### bad
    writer.writerows(pred_rows)

print(f"機率的 embedding 儲存在 '{prob_file}' 檔案中，共 {len(prob_rows)} 筆資料。")
print(f"向量長度依序為 outfits({len(prob_rows[0][4])}), 上/下衣({len(prob_rows[0][6])})。")
print(f"向量長相 {prob_rows[0][4][:5]}... ")
print()
print(f"multi-hot 的 embedding 儲存在 '{pred_file}' 檔案中，共 {len(pred_rows)} 筆資料。")
print(f"向量長度依序為 outfits({len(pred_rows[0][4])}), 上/下衣({len(pred_rows[0][6])})。")
print(f"向量長相 {pred_rows[0][4][:5]}... ")

機率的 embedding 儲存在 'bad_embedding_probEQ.csv' 檔案中，共 2094 筆資料。
向量長度依序為 outfits(196), 上/下衣(98)。
向量長相 [0.048503175, 0.04974859, 0.054003127, 0.06407535, 0.052572835]... 

multi-hot 的 embedding 儲存在 'bad_embedding_predEQ.csv' 檔案中，共 2094 筆資料。
向量長度依序為 outfits(196), 上/下衣(98)。
向量長相 [0, 0, 0, 0, 0]... 


### ugly data (input ugly data, output ugly embeddings)

In [4]:
import csv

# input outfits file
outfits_file = '../outfits/ugly_outfits.csv'

# output embedding file
prob_file = 'ugly_embedding_probEQ.csv'
pred_file = 'ugly_embedding_predEQ.csv'
label = 0  ### 1 or 0

prob_rows = []
pred_rows = []

# read outfits_file
with open(outfits_file, mode='r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        index = row['index']  ### ugly
        img_path = row['img_path']  ### ugly
        img_pathE = row['img_pathE']
        img_pathQ = row['img_pathQ']
        
        probEQ, probE, probQ, predEQ, predE, predQ = img2vec(img_pathE, img_pathQ)
        
        prob_rows.append([index, img_path, probEQ, label, probE, probQ])  ### ugly
        pred_rows.append([index, img_path, predEQ, label, predE, predQ])  ### ugly

# write prob_file
with open(prob_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['index', 'img_path', 'probEQ', 'label', 'probE', 'probQ'])  ### ugly
    writer.writerows(prob_rows)

# write pred_file
with open(pred_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['index', 'img_path', 'predEQ', 'label', 'predE', 'predQ'])  ### ugly
    writer.writerows(pred_rows)

print(f"機率的 embedding 儲存在 '{prob_file}' 檔案中，共 {len(prob_rows)} 筆資料。")
print(f"向量長度依序為 outfits({len(prob_rows[0][2])}), 上/下衣({len(prob_rows[0][4])})。")
print(f"向量長相 {prob_rows[0][4][:5]}... ")
print()
print(f"multi-hot 的 embedding 儲存在 '{pred_file}' 檔案中，共 {len(pred_rows)} 筆資料。")
print(f"向量長度依序為 outfits({len(pred_rows[0][2])}), 上/下衣({len(pred_rows[0][4])})。")
print(f"向量長相 {pred_rows[0][4][:5]}... ")

機率的 embedding 儲存在 'ugly_embedding_probEQ.csv' 檔案中，共 1105 筆資料。
向量長度依序為 outfits(196), 上/下衣(98)。
向量長相 [0.0566553  0.053541   0.05741191 0.06693212 0.06797753]... 

multi-hot 的 embedding 儲存在 'ugly_embedding_predEQ.csv' 檔案中，共 1105 筆資料。
向量長度依序為 outfits(196), 上/下衣(98)。
向量長相 [0 0 0 0 0]... 


### end