In [28]:
import argparse
import json
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import seaborn as sns
import os
from glob import glob
from engines.week16_engines4_mse_knn_trial import *
# from engines.models_transformer import *
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from scipy.stats import boxcox
from scipy.special import inv_boxcox,boxcox1p,inv_boxcox1p



def none_or_int(value):
    if value.lower() == 'none':
        return None
    return int(value)

parser = argparse.ArgumentParser(description='Training script for MultiModalMOdel')

parser.add_argument('--device',type=int,default=0,help='device number for gpu accelerating (default = 0)')
parser.add_argument('--batch_size',type=int,default=68,help='batch size (default=64)')
parser.add_argument('--image_embeddings_dim_out', type=none_or_int, default=128, help='Add Linear Layer, Output dimension for image embeddings (default=128, set "None" for no linear embedding proj)')
parser.add_argument('--text_embeddings_dim_out', type=none_or_int, default=128, help='Add Linear Layer, Output dimension for text embeddings (default=128, set "None" for no linear embedding proj))')
parser.add_argument('--other_features_dim_out', type=none_or_int, default=None, help='Add Linear Layer, Output dimension for other features embeddings (default=128, set "None" for no linear embedding proj))')
parser.add_argument('--header_mode',type=str,default='FFN',help='FFN, Dense, Transformer, FFN_Transformer, Dense_Transformer')
parser.add_argument('--header_hidden_dims', type=int, nargs='+', default=[128], help='Add Linear Layer, Hidden dimensions for the header (default=128)')
parser.add_argument('--dir_path', type=str, default='this_is_experiment', help='Directory path to save the model state dict (default="model_state_dict")') 
parser.add_argument('--train_dataset',type=str, default='/home/sflab/SFLAB/sungheon/nsr/dataset/nsr_train_할인율0_중분류.csv',help='train dataset path (default="../dataset/nsr_train.csv")')
parser.add_argument('--test_dataset',type=str, default='/home/sflab/SFLAB/sungheon/nsr/dataset/nsr_test_할인율0_중분류.csv',help='test dataset path (default="../dataset/nsr_test.csv")')
parser.add_argument('--random_seed',type=int, default=42,help='random seed (default=42)')
parser.add_argument('--image_size',type=int,default=256,help='image size for transforming (default=256)')
parser.add_argument('--learning_rate',type=float,default=0.01,help='learning rate (default=0.0000001)')
parser.add_argument('--num_epochs',type=int,default=50,help='Num epochs for training (default=50)')
parser.add_argument('--loss_ratio',type=float,default=1,help='Scaled/Unscaled Loss ratio, default=1(totally weight on target Scaled loss (default=1)')
parser.add_argument('--image_normalizing',type=bool,default=False,help='Decide wheter use image nomralizing or not(default=False)')
parser.add_argument('--nhead',type=int,default=4,help='transformer nhead')
parser.add_argument('--num_encoder_layers',type=int,default=6,help='transformer num encoder layers')
parser.add_argument('--num_decoder_layers',type=int,default=6,help='transformer num decoder layers')
parser.add_argument('--model_path',type=str,default='engines/models_transformer_knn_trial.py',help='model.py path')
parser.add_argument('--activation_func',type=str,default='gelu',help='gelu or relu')

if '--help' in sys.argv or '-h' in sys.argv:
    parser.print_help()
    sys.exit()
    
if 'ipykernel' in sys.modules:
    # Jupyter 노트북에서 실행 중인 경우
    args = parser.parse_args(args=[])
else:
    # 일반 스크립트로 실행 중인 경우
    args = parser.parse_args()

model_path = args.model_path
model_py = model_path.split('/')[-1]
if model_py == 'models_transformer.py':
    from engines.models_transformer import *
elif model_py == 'models_transformer_v2.py':
    from engines.models_transformer_v2 import *
elif model_py == 'models_transformer_knn_trial.py':
    from engines.models_transformer_knn_trial import *

set_random_seed(args.random_seed)

os.makedirs(args.dir_path, exist_ok=True)
config_path = os.path.join(args.dir_path, 'config.json')
with open(config_path, 'w',encoding='utf-8') as f:
    json.dump(vars(args), f, indent=4,ensure_ascii=False)

df_train = pd.read_csv(args.train_dataset)
df_train = df_train.drop(columns=['Unnamed: 0', '판매시작연도', '판매첫날', '상품코드', '판매일자', '상품명', '상품명2', '칼라', '칼라명', '칼라명2', '현재가', '할인율(%)', '파일경로', '이미지갯수', '외관설명', '기능설명','카테고리'],errors='ignore')

cols = df_train.columns.tolist()
df_test = pd.read_csv(args.test_dataset)
df_test = df_test[cols]
df_train = df_train.drop_duplicates()
df_test = df_test.drop_duplicates()

y_train = np.array(df_train['판매수량'].tolist())
y_test = np.array(df_test['판매수량'].tolist())

# y_train = y_train +1
# y_test = y_test + 1

boxcox_y_train, lambda_train = boxcox(y_train)

lowerbound = np.quantile(y_train, 0.16)
upperbound = np.quantile(y_train, 0.84)

a = 2 / (lowerbound**lambda_train - upperbound**lambda_train)
b = a * (lowerbound**lambda_train) - 1

y_train_boxcoxed_with_shifting = a * (y_train**lambda_train) - b
y_test_boxcoxed_with_shifting = a * (y_test**lambda_train) - b
# df_train['판매수량_scaled'] = boxcox_y_train
# df_test['판매수량_scaled'] = boxcox_y_test
df_train['판매수량_scaled'] = y_train_boxcoxed_with_shifting
df_test['판매수량_scaled'] = y_test_boxcoxed_with_shifting

# set random seed
generator = torch.Generator()
generator.manual_seed(args.random_seed)

train_dataloader, test_dataloader = datapreprocessing(df_train, df_test, generator, image_resize_shape=(args.image_size, args.image_size), image_normalizing=args.image_normalizing,batch_size=args.batch_size, num_workers=os.cpu_count(), pin_memory=True)

# define embdding dims
text_embedding_dim_in = len(df_train.filter(like='설명').columns)
other_features_dim_in = df_train.drop(df_train.filter(like='설명').columns,axis=1).shape[-1]-3




Compose(
    Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=warn)
    ToTensor()
)


In [38]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 108 entries, 0 to 4898
Columns: 1557 entries, 이미지파일 to 판매수량_scaled
dtypes: bool(15), float64(1537), int64(4), object(1)
memory usage: 1.3+ MB


In [29]:
os.chdir('/home/sflab/SFLAB/sungheon/nsr/public/')

In [30]:
import argparse
import json
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import seaborn as sns
import os
from glob import glob
from engines.week16_engines4_mse_knn_trial import *
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from scipy.stats import boxcox
from scipy.special import inv_boxcox,boxcox1p,inv_boxcox1p
from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity

class MULTIIMBEDDING(nn.Module):
    def __init__(self, num_images_per_data, header_mode:str='ffn',
                 image_embeddings_dim_out=None, text_embedding_dim_in=None, 
                 text_embedding_dim_out=None, other_features_dim_in=None, 
                 other_features_dim_out=None, header_hidden_dims:list=None,
                 nhead:int=8, num_encoder_layers:int=6,num_decoder_layers:int=6,activation:str='gelu'):
        
        super().__init__()
        """
        header_mode : {'ffn', 'dense', 'transformer','ffn_transformer','dense_transformer'} 이 중에서 선택해주세요.
        num_images_per_data : 데이터 포인트 당 이미지 갯수 (데이터 로드 하는 과정에서 1개 가진 이미지도 복제 하여 모두 2개로 만들어 놨습니다. 수정하실 필요 없이 2로 고정하시면 됩니다.)
        image_embeddings_dim_out : 원하는 이미지 emedding dimension (맘대로 가능)
        text_embedding_dim_in : dataframe의 텍스트 칼럼 dimension (데이터 프레임에 있는 텍스트 임베딩 칼럼 갯수.. 고정값)
        text_embedding_dim_out : 선형 변환을 통한 텍스트 embedding dim (맘대로 가능)
        other_features_dim_in : dataframe의 텍스트 embedding 칼럼 및 이미지, target 칼럼 제외, 나머지 feature 칼럼들 dimension (데이터 프레임에 있는 텍스트 제외한 칼럼들 갯수.. 고정값)
        other_features_dim_out : 선형 변환을 통한 output feature dim (맘대로 가능)
        header_hidden_dims : 각 모델의 ffn이나, dense 블록의 hidden dim
        nhead : transformer 계열 모델에서 num head,
        num_encoder_layers : transformer의 encoder layer 갯수
        num_decoder_layers : transformer의 decoder layer 갯수
        """
        
        # Image
        self.header_mode = header_mode
        self.num_images = num_images_per_data
        self.image_model_weights = torchvision.models.ResNet152_Weights.DEFAULT
        self.image_model = torchvision.models.resnet152(weights=self.image_model_weights)
        
        if image_embeddings_dim_out is not None:
            self.image_model.fc = nn.Linear(in_features=self.image_model.fc.in_features, out_features=image_embeddings_dim_out)
            self.image_output_dim = image_embeddings_dim_out
        else:
            self.image_output_dim = self.image_model.fc.in_features
            self.image_model.fc = nn.Identity()

        # Text
        if text_embedding_dim_out is not None:
            self.text_fc = nn.Linear(in_features=text_embedding_dim_in, out_features=text_embedding_dim_out)
            self.text_embedding_dim = text_embedding_dim_out
        else:
            self.text_fc = nn.Identity()
            self.text_embedding_dim = text_embedding_dim_in

        # 나머지 feature
        if other_features_dim_out is not None:
            self.rest_feature_fc = nn.Linear(in_features=other_features_dim_in, out_features=other_features_dim_out)
            self.other_features_dim = other_features_dim_out
        else:
            self.rest_feature_fc = nn.Identity()
            self.other_features_dim = other_features_dim_in

        print('image_output_dim :',self.image_output_dim)
        print('text_embedding_dim :',self.text_embedding_dim)
        print('other_features_dim :',self.other_features_dim)
        # 막단 layer
        self.input_dim = self.image_output_dim * self.num_images + self.text_embedding_dim + self.other_features_dim
        if activation.lower() == 'relu':
            self.activ = nn.ReLU()
        elif activation.lower() == 'gelu':
            self.activ = nn.GELU()
                
    def forward(self, images, text, other_features):
        image_embeddings = [self.activ(self.image_model(image)) for image in images]
        image_embeddings = torch.flatten(torch.stack(image_embeddings, dim=0), start_dim=1)

        text_embeddings = self.text_fc(text)
        text_embeddings = self.activ(text_embeddings)
        
        other_embeddings = self.rest_feature_fc(other_features)
        other_embeddings = self.activ(other_embeddings)
        
        combined_embeddings = torch.cat((image_embeddings, text_embeddings, other_embeddings), dim=1)
        return combined_embeddings
  

In [31]:
model = MULTIIMBEDDING(num_images_per_data=2,
                        header_mode= args.header_mode,
                        image_embeddings_dim_out=args.image_embeddings_dim_out,
                        text_embedding_dim_in=text_embedding_dim_in,
                        text_embedding_dim_out=args.text_embeddings_dim_out,
                        other_features_dim_in=other_features_dim_in,
                        other_features_dim_out=args.other_features_dim_out,
                        header_hidden_dims=args.header_hidden_dims,
                        nhead = args.nhead,
                        num_encoder_layers = args.num_encoder_layers,
                        num_decoder_layers = args.num_decoder_layers)


image_output_dim : 128
text_embedding_dim : 128
other_features_dim : 18


In [32]:
def embed(model,train_dataloader, test_dataloader,device='cpu',dir_path='model_state_dict',a=None,b=None,boxcox_lambda=None):

    os.makedirs(dir_path,exist_ok=True)
    model.to(device)


    for param in model.image_model.parameters():
        param.requires_grad = False

    for param in model.image_model.fc.parameters():
        param.requires_grad = True
        
        for i, batch in tqdm(enumerate(train_dataloader),desc='Training',leave=False):
            images, text_embeddings, other_embeddings, targets_unscaled, targets_scaled = [x.to(device) for x in batch]
            output_embedding = model(images, text_embeddings, other_embeddings)
            return output_embedding


In [33]:
a = embed(model,train_dataloader,test_dataloader,device='cuda:0',dir_path=args.dir_path,a=a,b=b,boxcox_lambda=lambda_train)


                            

In [34]:
a.shape

torch.Size([107, 402])

In [42]:
train_dist = euclidean_distances(a.cpu().detach().numpy(), a.cpu().detach().numpy())
np.fill_diagonal(train_dist, np.Inf)

In [43]:
train_dist

array([[      inf, 1.9505497, 1.4213275, ..., 0.893373 , 1.8796357,
        1.8881046],
       [1.9505497,       inf, 1.4633942, ..., 1.93993  , 1.5065978,
        1.600001 ],
       [1.4213275, 1.4633942,       inf, ..., 1.4698195, 1.8692372,
        1.4249022],
       ...,
       [0.893373 , 1.93993  , 1.4698195, ...,       inf, 1.9120259,
        1.9614851],
       [1.8796357, 1.5065978, 1.8692372, ..., 1.9120259,       inf,
        1.8848342],
       [1.8881046, 1.600001 , 1.4249022, ..., 1.9614851, 1.8848342,
              inf]], dtype=float32)