In [73]:
import argparse
import json
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import os
from glob import glob
import random
# from engines.models_transformer import *g
from engines.models_knn import *
from engines.week16_engines_knn_no_scaling import *
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter



def none_or_int(value):
    if value.lower() == 'none':
        return None
    return int(value)

parser = argparse.ArgumentParser(description='Training script for MultiModalMOdel')

parser.add_argument('--device',type=int,default=0,help='device number for gpu accelerating (default = 0)')
parser.add_argument('--batch_size',type=int,default=64,help='batch size (default=64)')
parser.add_argument('--image_embeddings_dim_out', type=none_or_int, default=128, help='Add Linear Layer, Output dimension for image embeddings (default=128, set "None" for no linear embedding proj)')
parser.add_argument('--text_embeddings_dim_out', type=none_or_int, default=128, help='Add Linear Layer, Output dimension for text embeddings (default=128, set "None" for no linear embedding proj))')
parser.add_argument('--other_features_dim_out', type=none_or_int, default=None, help='Add Linear Layer, Output dimension for other features embeddings (default=128, set "None" for no linear embedding proj))')
parser.add_argument('--header_mode',type=str,default='FFN',help='FFN, Dense, Transformer, FFN_Transformer, Dense_Transformer')
parser.add_argument('--header_hidden_dims', type=int, nargs='+', default=[128], help='Add Linear Layer, Hidden dimensions for the header (default=128)')
parser.add_argument('--dir_path', type=str, default='/home/sflab/SFLAB/sungheon/nsr/public/results_folder/mse_save_model_unscal_2/full_3_ffn', help='Directory path to save the model state dict (default="model_state_dict")') 
parser.add_argument('--train_dataset',type=str, default='../dataset/nsr_train_할인율0_중분류.csv',help='train dataset path (default="../dataset/nsr_train_할인율0_중분류.csv")')
parser.add_argument('--test_dataset',type=str, default='../dataset/nsr_test_할인율0_중분류.csv',help='test dataset path (default="../dataset/nsr_test_할인율0_중분류.csv")')
parser.add_argument('--random_seed',type=int, default=42,help='random seed (default=42)')
parser.add_argument('--image_size',type=int,default=256,help='image size for transforming (default=256)')
parser.add_argument('--learning_rate',type=float,default=0.01,help='learning rate (default=0.0000001)')
parser.add_argument('--num_epochs',type=int,default=15,help='Num epochs for training (default=50)')
parser.add_argument('--loss_ratio',type=float,default=0,help='Scaled/Unscaled Loss ratio, default=1(totally weight on target Scaled loss (default=1)')
parser.add_argument('--image_normalizing',type=bool,default=False,help='Decide wheter use image nomralizing or not(default=False)')
parser.add_argument('--nhead',type=int,default=4,help='transformer nhead')
parser.add_argument('--num_encoder_layers',type=int,default=6,help='transformer num encoder layers')
parser.add_argument('--num_decoder_layers',type=int,default=6,help='transformer num decoder layers')
parser.add_argument('--model_path',type=str,default='engines/models_knn2.py',help='model.py path')
parser.add_argument('--activation_func',type=str,default='gelu',help='gelu or relu')
parser.add_argument('--knn_metric',type=str,default='euclidean',help='euclidean or cosine')
parser.add_argument('--k_neighbors',type=int,default=3,help='neighbors')
parser.add_argument('--knn_embedding_metadata_concat',type=bool,default=False,help='Decide whether concatenate knn_embedding and metadata')

if '--help' in sys.argv or '-h' in sys.argv:
    parser.print_help()
    sys.exit()
    
if 'ipykernel' in sys.modules:
    # Jupyter 노트북에서 실행 중인 경우
    args = parser.parse_args(args=[])
else:
    # 일반 스크립트로 실행 중인 경우
    args = parser.parse_args()

set_random_seed(args.random_seed)

os.makedirs(args.dir_path, exist_ok=True)
config_path = os.path.join(args.dir_path, 'config.json')
with open(config_path, 'w',encoding='utf-8') as f:
    json.dump(vars(args), f, indent=4,ensure_ascii=False)

df_train = pd.read_csv(args.train_dataset,index_col=0)
df_test = pd.read_csv(args.test_dataset,index_col=0)
df_train_true = df_train.copy()
df_test_true = df_test.copy()

hi_train = pd.DataFrame()
hi_train['상품코드'] = df_train['상품코드']
hi_train['칼라'] = df_train['칼라명2']
hi_train['판매수량_true'] = df_train['판매수량']


hi_test = pd.DataFrame()
hi_test['상품코드'] = df_test['상품코드']
hi_test['칼라'] = df_test['칼라명2']
hi_test['판매수량_true'] = df_test['판매수량']

y_train = np.array(df_train['판매수량'].tolist())
y_test = np.array(df_test['판매수량'].tolist())

device = torch.device(f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu')

df_train, df_test = get_nearest_neighbors(df_train,df_test,device=device,k=args.k_neighbors,dis_metric=args.knn_metric,image_normalizing=args.image_normalizing,image_resize_shape=(args.image_size,args.image_size),image_embedding_dim=args.image_embeddings_dim_out)

df_train = df_train.drop(columns=['Unnamed: 0', '판매시작연도', '판매첫날', '상품코드', '판매일자', '상품명', '상품명2', '칼라', '칼라명', '칼라명2', '현재가', '할인율(%)', '파일경로', '이미지갯수', '외관설명', '기능설명','카테고리'],errors='ignore')
df_train = df_train.drop(df_train.filter(like='closest idx').columns,axis=1,errors='ignore')

cols = df_train.columns.tolist()

df_test = df_test[cols]
hi_train = hi_train.drop_duplicates()
hi_test = hi_test.drop_duplicates()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_sub['상품코드_칼라명'] = df_test_sub['상품코드'] + '_' + df_test_sub['칼라명2']


In [83]:
def get_k_neighbors_mean(df,neighbor_col,target_col,normalize_method):
    """
    normalize_method : 'minmax' : MinMaxScaler, 'standard' : StandardScaler
    """
    from sklearn.preprocessing import MinMaxScaler, StandardScaler
    normalizing = {'minmax':MinMaxScaler(),
                   'standard':StandardScaler()}
    scaler = normalizing[normalize_method]
    df2 = df.copy()
    df2['neighbor mean'] = df2[neighbor_col].apply(lambda x: np.mean(x))
    df2['distance'] = df2[target_col] - df2['neighbor mean']

    mean = df2['distance'].mean()
    std = df2['distance'].std()

    df2['normalized distance'] = scaler.fit_transform(df2['distance'].values.reshape(-1,1))
    return df2, scaler

In [84]:
df_train2, scaler = get_k_neighbors_mean(df_train,f"{args.k_neighbors} closest 판매수량",'판매수량','minmax')

In [85]:
df_train2.head(1)

Unnamed: 0_level_0,이미지파일,Color_BLACK,Color_BLACK DEEPESTRED,Color_BLACKBLACK,Color_BLACKCHARCOAL,Color_BLACKDGREY,Color_BLACKRED,Color_DEEPGR,Color_KHAKI,Color_MAROON,...,외관설명_벡터_766,외관설명_벡터_767,중분류_(통)긴바지,중분류_5/7부,중분류_9부,판매수량,3 closest 판매수량,neighbor mean,distance,normalized distance
상품코드_칼라명,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NSA1I01_BLACK,['../dataset/images/EVOKE TIGHTS MEN/BLACK/BLA...,True,False,False,False,False,False,False,False,False,...,-0.146764,0.091771,0,0,1,68,"[33, 190, 219]",147.333333,-79.333333,0.240531


In [None]:
import argparse
import json
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import seaborn as sns
import os
from glob import glob
from engines.week16_engines_knn_no_box import *
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from scipy.stats import boxcox
from scipy.special import inv_boxcox,boxcox1p,inv_boxcox1p

class MultiModalModel(nn.Module):
    def __init__(self, num_images_per_data=2,
                 header_mode:str='ffn',
                 image_embeddings_dim_out=None, text_embedding_dim_in=None, 
                 text_embedding_dim_out=None, other_features_dim_in=None, 
                 other_features_dim_out=None, header_hidden_dims:list=None,
                 knn_embedding_dim:int=3,
                 knn_embedding_metadata_concat=False,
                 nhead:int=8, num_encoder_layers:int=6,num_decoder_layers:int=6,activation:str='gelu'):
        
        super().__init__()
        """
        header_mode : {'ffn', 'dense', 'transformer','ffn_transformer','dense_transformer'} 이 중에서 선택해주세요.
        
        image_embeddings_dim_out : 원하는 이미지 emedding dimension (맘대로 가능)
        text_embedding_dim_in : dataframe의 텍스트 칼럼 dimension (데이터 프레임에 있는 텍스트 임베딩 칼럼 갯수.. 고정값)
        text_embedding_dim_out : 선형 변환을 통한 텍스트 embedding dim (맘대로 가능)
        other_features_dim_in : dataframe의 텍스트 embedding 칼럼 및 이미지, target 칼럼 제외, 나머지 feature 칼럼들 dimension (데이터 프레임에 있는 텍스트 제외한 칼럼들 갯수.. 고정값)
        other_features_dim_out : 선형 변환을 통한 output feature dim (맘대로 가능)
        header_hidden_dims : 각 모델의 ffn이나, dense 블록의 hidden dim
        nhead : transformer 계열 모델에서 num head,
        num_encoder_layers : transformer의 encoder layer 갯수
        num_decoder_layers : transformer의 decoder layer 갯수
        """
        self.knn_concat = knn_embedding_metadata_concat
        # Image
        self.header_mode = header_mode
      
        self.image_model_weights = torchvision.models.ResNet152_Weights.DEFAULT
        self.image_model = torchvision.models.resnet152(weights=self.image_model_weights)
        
        if image_embeddings_dim_out is not None:
            self.image_model.fc = nn.Linear(in_features=self.image_model.fc.in_features, out_features=image_embeddings_dim_out)
            self.image_output_dim = image_embeddings_dim_out
        else:
            self.image_output_dim = self.image_model.fc.in_features
            self.image_model.fc = nn.Identity()

        # Text
        if text_embedding_dim_out is not None:
            self.text_fc = nn.Linear(in_features=text_embedding_dim_in, out_features=text_embedding_dim_out)
            self.text_embedding_dim = text_embedding_dim_out
        else:
            self.text_fc = nn.Identity()
            self.text_embedding_dim = text_embedding_dim_in

        # 나머지 feature
        if other_features_dim_out is not None:
            if self.knn_concat is True:
                self.rest_feature_fc = nn.Linear(in_features=other_features_dim_in + knn_embedding_dim, out_features=other_features_dim_out)
                self.other_features_dim = other_features_dim_out

            else:
                self.rest_feature_fc = nn.Linear(in_features=other_features_dim_in, out_features=other_features_dim_out)
                self.other_features_dim = other_features_dim_out
        else:
            if self.knn_concat is True:

                self.rest_feature_fc = nn.Identity()
                self.other_features_dim = other_features_dim_in + knn_embedding_dim

            else:
                self.rest_feature_fc = nn.Identity()
                self.other_features_dim = other_features_dim_in

                

        print('image_output_dim :',self.image_output_dim)
        print('text_embedding_dim :',self.text_embedding_dim)
        print('other_features_dim :',self.other_features_dim)
        # 막단 layer

        if self.knn_concat is True:
            self.input_dim = self.image_output_dim * num_images_per_data + self.text_embedding_dim + self.other_features_dim

        else:
            self.input_dim = self.image_output_dim * num_images_per_data + self.text_embedding_dim + self.other_features_dim + knn_embedding_dim
        
        self.head_mlp = MLPRegression(self.input_dim, hidden_dims=header_hidden_dims, dropout_prob=0.2,activation=activation)
        
        
        if activation.lower() == 'relu':
            self.activ = nn.ReLU()
        elif activation.lower() == 'gelu':
            self.activ = nn.GELU()
            
        self.dense_block = Dense_block(self.input_dim,hidden_dims=header_hidden_dims,activation=activation)
        
        print('concated embed dim :',self.input_dim)

        if self.header_mode == 'transformer':
            if self.input_dim % nhead != 0:
        
                raise ValueError(f"concated embed dim (= {self.image_output_dim + self.text_embedding_dim + self.other_features_dim}) 은 nhead로 나누어 떨어져야만 합니다")
            
            else:
                self.transformer = nn.Transformer(d_model = self.input_dim,
                                          nhead = nhead,
                                          num_encoder_layers = num_encoder_layers,
                                          num_decoder_layers = num_decoder_layers,
                                          batch_first=True)
                self.fc = nn.Linear(self.input_dim,1)
        # if self.header_mode.lower() == 'ffn':
        #     self.header = self.head_mlp

        # elif self.header_mode.lower() == 'dense':
        #     self.header = self.dense_block

        # elif self.header_mode.lower() == 'transformer':
        
            

        

        if self.header_mode.lower() == 'ffn_transformer':
            self.bottle = nn.Sequential(*list(*self.head_mlp.children())[:-1])
            self.bottle_out_dim = list(self.bottle)[-4].out_features
            if self.bottle_out_dim % nhead != 0:
        
                raise ValueError(f"Bottle Neck Output Dim (={self.bottle_out_dim}) 은 nhead로 나누어 떨어져야만 합니다")
            
            else:
                self.transformer = nn.Transformer(d_model = self.bottle_out_dim,
                                              nhead = nhead,
                                              num_encoder_layers = num_encoder_layers,
                                              num_decoder_layers = num_decoder_layers,
                                              batch_first=True)
                self.fc = nn.Linear(self.bottle_out_dim,1)
                
        elif self.header_mode.lower() == 'dense_transformer':
            self.bottle = nn.Sequential(*list(*self.dense_block.children())[:-1])
            self.bottle_out_dim = list(self.bottle)[-1].out_features
            
            if self.bottle_out_dim % nhead != 0:
        
                raise ValueError(f"Bottle Neck Output Dim (={self.bottle_out_dim}) 은 nhead로 나누어 떨어져야만 합니다")
            
            else:
                self.transformer = nn.Transformer(d_model = self.bottle_out_dim,
                                              nhead = nhead,
                                              num_encoder_layers = num_encoder_layers,
                                              num_decoder_layers = num_decoder_layers,
                                              batch_first=True) 
                self.fc = nn.Linear(self.bottle_out_dim,1)
                
    def forward(self, images, text, other_features, knn_embedding):
        image_embeddings = [self.activ(self.image_model(image)) for image in images]
        image_embeddings = torch.stack(image_embeddings,dim=0)
        
        image_embeddings2 = torch.flatten(image_embeddings, start_dim=1)

        text_embeddings = self.text_fc(text)
        text_embeddings = self.activ(text_embeddings)

        if self.knn_concat is True:

            other_embeddings = torch.cat((other_features,knn_embedding),dim=1)
            other_embeddings = self.rest_feature_fc(other_embeddings)
            other_embeddings = self.activ(other_embeddings)
            combined_embeddings = torch.cat((image_embeddings2, text_embeddings, other_embeddings), dim=1)
            
        else:

            other_embeddings = self.rest_feature_fc(other_features)
            other_embeddings = self.activ(other_embeddings)
            combined_embeddings = torch.cat((image_embeddings2, text_embeddings, other_embeddings,knn_embedding), dim=1)

            
        

        y =self.activ(combined_embeddings)

        if self.header_mode.lower() == 'ffn':
            y = self.head_mlp(y)

        elif self.header_mode.lower() == 'dense':
            y = self.dense_block(y)

        elif self.header_mode.lower() == 'transformer':
            
            y = self.transformer(y,y)
            y = self.activ(y)
            y = self.fc(y)

        else:

            y = self.bottle(y)
            y = self.transformer(y,y)
            y = self.activ(y)
            y = self.fc(y)
           

        return nn.Sigmoid(y)

class MLPRegression(nn.Module):
    def __init__(self, input_dim, hidden_dims, dropout_prob=0.2,activation='gelu'):
        super(MLPRegression, self).__init__()
        layers = []
        in_features = input_dim

            
        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(in_features, hidden_dim))
            layers.append(nn.BatchNorm1d(hidden_dim))  # Batch Normalization
            if activation.lower() == 'gelu':
                layers.append(nn.GELU())
            elif activation.lower() == 'relu':
                layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_prob))  # Dropout
            in_features = hidden_dim
        
        layers.append(nn.Linear(in_features, 1))  # Output layer for regression

        self.mlp = nn.Sequential(*layers)

    def forward(self, x):
        return self.mlp(x)

class Dense_block(nn.Module):
    def __init__(self, input_dim,hidden_dims,activation='gelu'):
        super().__init__()
        layers = []
        in_features = input_dim
        
    
            
        if hidden_dims:
            for hidden_dim in hidden_dims:
                layers.append(nn.Linear(in_features,hidden_dim))
                if activation.lower() == 'gelu':
                    layers.append(nn.GELU())
                elif activation.lower() == 'relu':
                    layers.append(nn.ReLU())

                in_features = hidden_dim

        elif hidden_dims == 0 or "None":
            pass

        layers.append(nn.Linear(in_features,1))
        self.block = nn.Sequential(*layers)

    def forward(self,x):
        return self.block(x)

In [87]:

scaler.inverse_transform(torch.tensor(df_train2['distance'],dtype=torch.float).reshape(-1,1))

  scaler.inverse_transform(torch.tensor(df_train2['distance'],dtype=torch.float).reshape(-1,1))


array([[ -54003.89061228],
       [  82433.        ],
       [  56229.89061228],
       [  12859.22265307],
       [ -85628.33333333],
       [  -8826.11132654],
       [ -35255.11197281],
       [  -8600.2220068 ],
       [ -49937.89061228],
       [ -40902.33333333],
       [   9696.7779932 ],
       [ -55811.        ],
       [  34544.55469386],
       [  26186.66666667],
       [  29800.88802719],
       [   7212.        ],
       [  -3178.8889966 ],
       [-121770.55210876],
       [  -5211.8889966 ],
       [ -26219.55469386],
       [   2468.33333333],
       [  -3178.8889966 ],
       [  -7019.        ],
       [ -14699.22265307],
       [ -18539.33333333],
       [-150684.33333333],
       [-160623.44789124],
       [  -7019.        ],
       [  68879.66666667],
       [ -35932.77863948],
       [  91242.66666667],
       [ -26897.22136052],
       [ -43161.22136052],
       [  51486.22394562],
       [ -51519.10938772],
       [  -9955.55534013],
       [  15344.        ],
 

In [None]:
import torch
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn as nn
import torchvision
import pandas as pd
from torchvision.models import resnet18
from torchvision import transforms
from PIL import Image
import numpy as np
import ast
from tqdm import tqdm
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import os
import torch
import numpy as np
import random

def get_nearest_neighbors(df_train,df_test,device,image_embedding_csv=None,k:int=3,dis_metric:str='euclidean',image_normalizing=False,image_resize_shape:int=256,image_embedding_model=None,image_embedding_dim:int=128):
    from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity
    from PIL import Image

    metric_dict = {'euclidean':euclidean_distances,
                   'cosine':cosine_similarity}

    dis_metric = metric_dict[dis_metric]

    columns_to_drop = ['판매시작연도', '현재가', '이미지갯수', '파일경로', '할인율(%)', '판매첫날', '판매일자', '상품명', '상품명2', '칼라', '칼라명', '외관설명', '기능설명', '카테고리']
    
    df_train_sub = df_train.drop(columns=columns_to_drop,errors='ignore')
    df_test_sub = df_test[df_train_sub.columns]

    df_train_sub['상품코드_칼라명'] = df_train_sub['상품코드'] + '_' + df_train_sub['칼라명2']
    df_test_sub['상품코드_칼라명'] = df_test_sub['상품코드'] + '_' + df_test_sub['칼라명2']

    df_train_sub = df_train_sub.drop('칼라명2',axis=1)
    df_test_sub = df_test_sub.drop('칼라명2',axis=1)
    
    df_train_sub = df_train_sub.drop_duplicates()
    df_test_sub = df_test_sub.drop_duplicates()
    
    df_train_sub = df_train_sub.set_index('상품코드_칼라명')
    df_test_sub = df_test_sub.set_index('상품코드_칼라명')
    
    df_train_image = df_train_sub[['이미지파일']]
    df_test_image = df_test_sub[['이미지파일']]

    df_train_sell = df_train_sub[['판매수량']]
    df_test_sell = df_test_sub[['판매수량']]
    
    df_train_sub = df_train_sub.drop(columns='판매수량')
    df_test_sub = df_test_sub.drop(columns='판매수량')
    
    df_train_index = df_train_sub.index
    df_test_index = df_test_sub.index
    
    if image_embedding_model is None:
        extractor_weight = torchvision.models.ResNet152_Weights.DEFAULT
        extractor = torchvision.models.resnet152(weights = extractor_weight)

    else:
        extractor = image_embedding_model

    if image_embedding_dim is None:
        
        extractor = nn.Sequential(*list(extractor.children())[:-1],
                                  nn.Flatten())

    else:
        
        input_dim = list(extractor.children())[-1].in_features
        extractor = nn.Sequential(*list(extractor.children())[:-1],
                                  nn.Flatten(),
                                 nn.Linear(input_dim,image_embedding_dim)
                                 )
    assert image_normalizing in [True, False], "image_normalizing must be either True or False"
    
    if image_normalizing is False:
        transform = transforms.Compose([
            transforms.Resize(image_resize_shape),
            transforms.ToTensor()
        ])

    else:
        transform = transforms.Compose([
            transforms.Resize(image_resize_shape),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    if image_embedding_csv:
        
        df_train_sub = pd.concat([df_train_sub,image_embedding_csv],axis=1)
        df_test_sub = pd.concat([df_test_sub,image_embedding_csv],axis=1)
        
    else:
        
        train_embedding_list = []
        test_embedding_list = []
        extractor.eval()
        extractor.to(device)
        
        with torch.no_grad():
            
            for row in df_train_image['이미지파일'].apply(lambda x : ast.literal_eval(x)):
                images = [Image.open(image) for image in row]
                images = [transform(image) for image in images]
                
                if len(images) == 1:
                    images.append(images[-1])


                images = torch.stack(images)
                images = images.to(device)
                image_embed = extractor(images)
                image_embed = image_embed.flatten()
                train_embedding_list.append(image_embed.detach().cpu().numpy())

            for row in df_test_image['이미지파일'].apply(lambda x : ast.literal_eval(x)):
                images = [Image.open(image) for image in row]
                images = [transform(image) for image in images]
                
                if len(images) == 1:
                    images.append(images[0])


                images = torch.stack(images)
                images = images.to(device)
                image_embed = extractor(images)
                image_embed = image_embed.flatten()
                test_embedding_list.append(image_embed.detach().cpu().numpy())
                
    train_embedding_list = np.array(train_embedding_list)
    test_embedding_list = np.array(test_embedding_list)
    
    df_train_img_embed = pd.DataFrame(train_embedding_list,index=df_train_index,columns=[f'image_embed_{i}' for i in range(train_embedding_list.shape[-1])])
    df_test_img_embed = pd.DataFrame(test_embedding_list,index=df_test_index,columns=[f'image_embed_{i}' for i in range(test_embedding_list.shape[-1])])        
    
    df_train_sub2 = df_train_sub.drop(['이미지파일','상품코드'],axis=1)
    df_test_sub2 = df_test_sub.drop(['이미지파일','상품코드'],axis=1)
    
    df_train_embeddings = pd.concat([df_train_sub2,df_train_img_embed],axis=1)
    df_test_embeddings = pd.concat([df_test_sub2,df_test_img_embed],axis=1)

    df_embeddings = pd.concat([df_train_embeddings,df_test_embeddings],axis=0)
    df_embeddings_index = df_embeddings.index
    
    metric = dis_metric(df_embeddings)
    np.fill_diagonal(metric,np.inf)
    df_metric = pd.DataFrame(metric,index=df_embeddings_index, columns=df_embeddings_index)
    df_metric = df_metric[df_train_embeddings.index]

    df_train_sub = pd.concat([df_train_sub,df_train_sell],axis=1)
    df_test_sub = pd.concat([df_test_sub, df_test_sell],axis=1)
    
    df_metric['closest'] = df_metric.apply(lambda x: x.nsmallest(k).index.values, axis=1)
    df_train_sub[f'{k} closest idx'] = df_metric.loc[df_train_sub.index, 'closest']
    df_test_sub[f'{k} closest idx'] = df_metric.loc[df_test_sub.index,'closest']

    df_train_sub[f'{k} closest 판매수량'] = df_train_sub[f'{k} closest idx'].apply(lambda x: [df_train_sub['판매수량'][idx] for idx in x])
    df_test_sub[f'{k} closest 판매수량'] = df_test_sub[f'{k} closest idx'].apply(lambda x: [df_train_sub['판매수량'][idx] for idx in x])

    
    return df_train_sub, df_test_sub
    
def calculate_ad_smape_grouped_by(df_pred,df_true,eval_metric):

    if '칼라' in df_pred.columns.tolist():
        df_true = df_true[['상품코드','칼라명2','카테고리']]
        df_true = df_true.rename(columns={'칼라명2':'칼라'})
    
        df = df_pred.merge(df_true,how='left',on=['상품코드','칼라'])

    else:
        df_true = df_true[['상품코드','카테고리']]
        df = df_pred.merge(df_true,how='left',on='상품코드')
    
    cats = df['카테고리'].unique()
    dictionary = {}
        
    for cat in cats:

        x = df[df['카테고리'] == cat]

        y1 = torch.tensor(x['판매수량_true'].values,dtype=torch.float)
        y2 = torch.tensor(x['판매수량_pred'].values,dtype=torch.float)
        
        eval_score = eval_metric(y1,y2)
        dictionary[cat] = eval_score

    return dictionary

def set_random_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

class AdjustedSMAPELoss(nn.Module):
    def __init__(self, epsilon=1e-6):
        """

        param epsilon: default 1e-6
        """
        super(AdjustedSMAPELoss, self).__init__()
        self.epsilon = epsilon

    def forward(self, y_pred, y_true):
      
        numerator = torch.abs(y_pred - y_true)
        denominator = torch.abs(y_pred) + torch.abs(y_true) + self.epsilon
        
        
        smape_score = torch.mean(numerator / denominator * 2)/2
        
        return smape_score

class nsr_img_txt_dataset(Dataset):
    def __init__(self, dataframe, transform=None,fixed_num_images=2):
        self.dataframe = dataframe
        self.transform = transform
        self.fixed_num_images = fixed_num_images
    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]

        image_paths = ast.literal_eval(row['이미지파일'])
    
        text_embeddings = row.filter(like='설명').values.astype(np.float32)
        target = row['normalized_distance']
        # target_scaled = row.get('판매수량_scaled',None)
        
        other_embeddings = row.drop(row.filter(like='설명').index,axis=0)
        other_embeddings = other_embeddings.drop(other_embeddings.filter(like='closest').index,axis=0)
        other_embeddings = other_embeddings.drop(['판매수량','이미지파일','판매수량_scaled'],errors='ignore').values.astype(np.float32)
        
    
        knn_embeddings = row.filter(like='closest 판매수량').values.item()
        
        images = [Image.open(image_path) for image_path in image_paths]
        if self.transform:
            images = [self.transform(image) for image in images]

        while len(images) < self.fixed_num_images:
            images.append(images[-1])
        images = images[:self.fixed_num_images]
        
        images_tensor = torch.stack(images)
        
        text_embeddings_tensor = torch.tensor(text_embeddings,dtype=torch.float32)
        other_embeddings_tensor = torch.tensor(other_embeddings,dtype=torch.float32)
        target_tensor = torch.tensor(target,dtype=torch.float32)
        knn_embeddings = torch.tensor(knn_embeddings,dtype=torch.float32)
        
        if target is not None:

   
            return images_tensor, text_embeddings_tensor, other_embeddings_tensor, knn_embeddings, target_tensor

def datapreprocessing(train_df, test_df, dataloader_generator, image_resize_shape=(256, 256), image_normalizing=False,batch_size=64, shuffle=True, num_workers=None, pin_memory=False):
    """
    train_df : DataFrame for Training
    test_df : DataFrame for Testing
    image_resize_shape : image_size to reshape 
    batch_size : Batch size
    """
    import os
    from torchvision import transforms
    from torch.utils.data import DataLoader
    
    assert image_normalizing in [True, False], "image_normalizing must be either True or False"
    
    if image_normalizing == False:
        transform = transforms.Compose([
            transforms.Resize(image_resize_shape),
            transforms.ToTensor()
        ])

    else:
        transform = transforms.Compose([
            transforms.Resize(image_resize_shape),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    
    print(transform)

    train_dataset = nsr_img_txt_dataset(train_df, transform=transform)
    test_dataset = nsr_img_txt_dataset(test_df, transform=transform)

    num_workers = num_workers if num_workers is not None else os.cpu_count()

    train_dataloader = DataLoader(
        train_dataset, 
        batch_size=batch_size,
        shuffle=shuffle,
        generator=dataloader_generator,
        num_workers=num_workers,
        pin_memory=pin_memory
    )
    
    test_dataloader = DataLoader(
        test_dataset, 
        batch_size=batch_size,
        generator=dataloader_generator,
        num_workers=num_workers,
        pin_memory=pin_memory
    )

    return train_dataloader, test_dataloader

def train(model,optimizer,criterion,eval_metric,train_dataloader, test_dataloader,writer=None, scheduler=None,device='cpu',num_epochs=50,dir_path='model_state_dict',scaler):

    os.makedirs(dir_path,exist_ok=True)
    model.to(device)

    distance_train_eval_list = []
    distance_train_loss_list = []
    real_value_train_eval_list = []
    real_value_train_loss_list = []
    
    distance_test_eval_list = []
    distance_test_loss_list = []
    real_value_test_eval_list = []
    real_value_test_loss_list = []
   
    best_score = float('inf')
    
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")

        # Train
        model.train()

        for param in model.image_model.parameters():
            param.requires_grad = False

        for param in model.image_model.fc.parameters():
            param.requires_grad = True
        
        distance_train_loss = 0.0
        distance_train_eval = 0.0
        real_value_train_loss = 0.0
        real_value_train_eval = 0.0
        
        for i, batch in tqdm(enumerate(train_dataloader),desc='Training',leave=False):
            images, text_embeddings, other_embeddings, knn_embeddings, targets = [x.to(device) for x in batch]
            outputs = model(images, text_embeddings, other_embeddings, knn_embeddings)

            distance_loss = criterion(outputs.flatten(),targets.flatten())
            distance_eval_score = eval_metric(outputs.flatten(), targets.flatten())

            real_value_pred = scaler.inverse_transform(outputs.reshape(-1,1))

            real_value_loss = criterion(real_value_pred.flatten(),targets.flatten())
            real_value_eval_score = eval_metric(real_value_pred.flatten(), targets.flatten())
            
            if torch.isnan(outputs).any():
                print(f"NaN detected in Train outputs at epoch {epoch+1}, batch {i+1}")
                continue
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            distance_train_eval +=  distance_eval_score.item()
            distance_train_loss += distance_loss.item()
            real_value_train_eval += real_value_eval_score.item()
            ral_value_train_loss += real_value_loss.item()

        distance_train_eval_list.append(distance_train_eval/len(train_dataloader))
        distance_train_loss_list.append(distance_train_loss/len(train_dataloader))
        real_value_train_eval_list.append(real_value_train_eval/len(train_dataloader))
        real_value_train_loss_list.append(real_value_train_loss/len(train_dataloader))

        model.eval()

        distance_test_loss = 0.0
        distance_test_eval = 0.0
        real_value_test_loss = 0.0
        real_value_test_eval = 0.0
        
        with torch.inference_mode():
            for i, batch in tqdm(enumerate(test_dataloader),desc='Testing',leave=False):
                images,text_embeddings, other_embeddings,knn_embeddings, targets = [x.to(device) for x in batch]
                outputs = model(images, text_embeddings, other_embeddings, knn_embeddings)
                
                distance_eval_score = eval_metric(outputs.flatten(), targets.flatten())
                distance_loss = criterion(outputs.flatten(), targets.flatten())

                real_value_pred = scaler.inverse_transform(outputs.reshape(-1,1))

                real_value_loss = criterion(real_value_pred.flatten(),targets.flatten())
                real_value_eval_score = eval_metric(real_value_pred.flatten(), targets.flatten())
            
                if torch.isnan(outputs).any():
                    print(f"NaN detected in Test outputs at epoch {epoch+1}, batch {i+1}")
                    continue
                
                distance_test_eval += distance_eval_score.item()
                distance_test_loss += distance_loss.item()
                real_value_test_eval += real_value_eval_score.item()
                real_value_test_loss += real_value_loss.item()
       
        distance_test_eval_list.append(distance_test_eval/len(test_dataloader))
        distance_test_loss_list.append(distance_test_loss/len(test_dataloader))
        real_value_test_eval_list.append(real_value_test_eval/len(test_dataloader))
        real_value_test_loss_list.append(real_value_test_loss/len(test_dataloader))

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'\t For Distance :')
        print(f'\t\t Loss :')
        print(f'\t\t\t Train : {distance_train_loss / len(train_dataloader):.4f}, Test : {distance_test_loss / len(test_dataloader):.4f}' )
        print(f'\t\t Eval Score :')
        print(f'\t\t\t Train : {distance_train_eval / len(train_dataloader):.4f}, Test : {distance_test_eval / len(test_dataloader):.4f}')
        
        print(f'\t For Real Value :')
        print(f'\t\t Loss :')
        print(f'\t\t\t Train : {real_value_train_loss / len(train_dataloader):.4f}, Test : {real_value_test_loss / len(test_dataloader):.4f}')
        print(f'\t\t Eval Score : ')
        print(f'\t\t\t Train : {real_value_train_eval / len(train_dataloader):.4f}, Test : {real_value_test_eval / len(test_dataloader):.4f}')

        if writer:
            # Distance
                # Loss
            writer.add_scalar(f'Distance/Loss/train',distance_train_loss/len(train_dataloader),epoch+1)
            writer.add_scalar(f'Distance/Loss/test',distance_test_loss/len(test_dataloader),epoch+1)
                # Eval Score
            writer.add_scalar(f'Distance/Eval/train',distance_train_eval/len(train_dataloader),epoch+1)
            writer.add_scalar(f'Distance/Eval/test',distance_test_eval/len(train_dataloader),epoch+1)

            # Real Value
                # Loss
            writer.add_scalar(f'Real_Value/Loss/train',real_value_train_loss/len(train_dataloader),epoch+1)
            writer.add_scalar(f'Real_Value/Loss/test',real_value_test_loss/len(test_dataloader),epoch+1)
                # Eval Score
            writer.add_scalar(f'Real_Value/Eval/train',real_value_train_eval/len(train_dataloader),epoch+1)
            writer.add_scalar(f'Real_Value/Eval/test',real_value_test_eval/len(train_dataloader),epoch+1)

        
        if torch.isnan(outputs).any() != True:
            if real_value_test_loss/len(test_dataloader) < best_score:
                best_score = real_value_test_loss/len(test_dataloader)
                torch.save(model.state_dict(), dir_path+'/'+'model.pt')
                best_model = model
                print(f"Saved best model at epoch {epoch + 1} with Real Value Target Loss: {best_score:.4f}")
       
        if scheduler:
            scheduler.step()

        # For Distance 
        ## Loss
        plt.figure(figsize=(10, 5))
        plt.plot(range(len(distance_train_loss_list), distance_train_loss_list, label='Train Loss'))
        plt.plot(range(len(distance_test_loss_list), distance_test_loss_list, label='Test Loss'))
        plt.title('Distance Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.savefig(dir_path+'/'+'Distance_Loss_plot.png')
        plt.close()

        ## Eval Score
        plt.figure(figsize=(10, 5))
        plt.plot(range(len(distance_train_eval_list), distance_train_eval_list, label='Train Eval'))
        plt.plot(range(len(distance_test_eval_list), distance_test_eval_list, label='Test Eval'))
        plt.title('Distance Eval')
        plt.xlabel('Epochs')
        plt.ylabel('Eval Score')
        plt.legend()
        plt.savefig(dir_path+'/'+'Distance_Eval_plot.png')
        plt.close()

        # For Real Value
        ## Loss
        plt.figure(figsize=(10, 5))
        plt.plot(range(len(real_value_train_loss_list), real_value_train_loss_list, label='Train Loss'))
        plt.plot(range(len(real_value_test_loss_list), real_value_test_loss_list, label='Test Loss'))
        plt.title('Real Value Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.savefig(dir_path+'/'+'Real_Value_Loss_plot.png')
        plt.close()
        ## Eval
        plt.figure(figsize=(10, 5))
        plt.plot(range(len(real_value_train_eval_list), real_value_train_eval_list, label='Train Eval'))
        plt.plot(range(len(real_value_test_eval_list), real_value_test_eval_list, label='Test Eval'))
        plt.title('Real Value Eval')
        plt.xlabel('Epochs')
        plt.ylabel('Eval Score')
        plt.legend()
        plt.savefig(dir_path+'/'+'Real_Value_Eval_plot.png')
        plt.close()


    if writer:
        writer.close()
    
    return best_model


       
        distance_test_eval_list.append(distance_test_eval/len(test_dataloader))
        distance_test_loss_list.append(distance_test_loss/len(test_dataloader))
        real_value_test_eval_list.append(real_value_test_eval/len(test_dataloader))
        real_value_test_loss_list.append(real_value_test_loss/len(test_dataloader))

def test(model,dataset,dataloader,df,df_true,criterion,eval_metric,device,output_dir=str):

    model.to(device)
    outputs_list = []
    distance_test_loss = 0.0
    distance_test_eval = 0.0
    real_value_test_loss = 0.0
    real_value_test_eval = 0.0
    
    with torch.inference_mode():
        for i, batch in tqdm(enumerate(test_dataloader),desc='Testing',leave=False):
            images,text_embeddings, other_embeddings,knn_embeddings, targets = [x.to(device) for x in batch]
            outputs = model(images, text_embeddings, other_embeddings, knn_embeddings)
            
            distance_eval_score = eval_metric(outputs.flatten(), targets.flatten())
            distance_loss = criterion(outputs.flatten(), targets.flatten())

            real_value_pred = scaler.inverse_transform(outputs.reshape(-1,1))

            real_value_loss = criterion(real_value_pred.flatten(),targets.flatten())
            real_value_eval_score = eval_metric(real_value_pred.flatten(), targets.flatten())
        
            if torch.isnan(outputs).any():
                print(f"NaN detected in Test outputs at epoch {epoch+1}, batch {i+1}")
                continue
            
            distance_test_eval += distance_eval_score.item()
            distance_test_loss += distance_loss.item()
            real_value_test_eval += real_value_eval_score.item()
            real_value_test_loss += real_value_loss.item()
            outputs_list.extend(real_value_pred.detach().cpu().numpy())

    distance_test_eval /= len(dataloader)
    distance_test_loss /= len(dataloader)
    real_value_test_eval /= len(dataloader)
    real_value_test_loss /= len(dataloader)

    outputs_list = [np.round(output.item(),2) for output in outputs_list]
   
    df['판매수량_pred'] = outputs_list + df['neighbor mean']
    saved_path = os.path.join(output_dir,dataset.split('/')[-1].split('.')[0]+'_preds.csv')
    
    hi = pd.DataFrame()
    hi['상품코드'] = df['상품코드']
    hi = hi.merge(df.groupby('상품코드')['판매수량_true'].sum(),on='상품코드')
    hi = hi.merge(df.groupby('상품코드')['판매수량_pred'].sum(),on='상품코드')
    
    test_eval_ItemCode = eval_metric(torch.tensor(hi['판매수량_pred'].values),torch.tensor(hi['판매수량_true'].values)).item()
    test_loss_ItemCode = criterion(torch.tensor(hi['판매수량_pred'].values),torch.tensor(hi['판매수량_true'].values)).item()
    
    loss_info_text = dataset.split('/')[-1].split('.')[0] + '_loss_info.txt'
    loss_info_path = os.path.join(output_dir, loss_info_text )
        
    hi.to_csv(os.path.join(output_dir,dataset.split('/')[-1].split('.')[0]+'_상품코드별_preds.csv'),encoding='UTF-8')
    df.to_csv(os.path.join(output_dir,dataset.split('/')[-1].split('.')[0]+'_preds.csv'),encoding='UTF-8')
    print(f'predict result saved in {saved_path}')

    dict_item_loss = calculate_ad_smape_grouped_by(hi,df_true,criterion)
    dict_item_eval = calculate_ad_smape_grouped_by(hi,df_true,eval_metric)
    
    dict_item_color_loss = calculate_ad_smape_grouped_by(df,df_true,criterion)
    dict_item_color_eval = calculate_ad_smape_grouped_by(df,df_true,eval_metric)
    
    with open(loss_info_path, 'w') as f:
        f.write(f'Eval Score : {test_loss:.4f}\n')
        f.write(f'Loss Score : {test_eval:.4f}\n')
        f.write(f'상품코드별 Eval Score : {test_eval_ItemCode:.4f}\n')
        f.write(f'상품코드별 Loss Score : {test_loss_ItemCode:.4f}\n')
        for k,v in dict_item_eval.items():
            f.write(f'{k} 상품코드별 Eval Score : {v:.4f}\n')

        for k,v in dict_item_loss.items():
            f.write(f'{k} 상품코드별 Loss Score : {v:.4f}\n')
            
        for k,v in dict_item_color_eval.items():
            f.write(f'{k} 상품코드+색상별 Eval Score : {v:.4f}\n')

        for k,v in dict_item_color_loss.items():
            f.write(f'{k} 상품코드+색상별 Eval Loss : {v:.4f}\n')

def calculate_adjusted_smape(row, loss_fn):
    y_true = torch.tensor([row['판매수량_true']], dtype=torch.float32)
    y_pred = torch.tensor([row['판매수량_pred']], dtype=torch.float32)
    score = loss_fn(y_pred, y_true).item()
    return score

    
def run(model,dataset,optimizer,criterion,eval_metric,train_dataloader,test_dataloader,dir_path,df,df_true,scheduler=None,writer=None,device='cpu',num_epochs=150):
    best_model = train(model,optimizer,criterion,eval_metric,train_dataloader,test_dataloader,writer=writer,scheduler=scheduler,device=device,num_epochs=num_epochs,dir_path=dir_path)
    print('Best Model for Train dataset')
    test(model=best_model,dataset=dataset[0],dataloader=train_dataloader,df=df[0],df_true=df_true[0],criterion=criterion,eval_metric=eval_metric,device=device,output_dir=dir_path)
    print('Best Model for Test dataset')
    test(model=best_model,dataset=dataset[1],dataloader=test_dataloader,df=df[1],df_true=df_true[1],criterion=criterion,eval_metric=eval_metric,device=device,output_dir=dir_path)

In [81]:
# set random seed
generator = torch.Generator()
generator.manual_seed(args.random_seed)

train_dataloader, test_dataloader = datapreprocessing(df_train, df_test, generator, image_resize_shape=(args.image_size, args.image_size), image_normalizing=args.image_normalizing,batch_size=args.batch_size, num_workers=os.cpu_count(), pin_memory=True)

text_embedding_dim_in = len(df_train.filter(like='설명').columns)
other_features_dim_in = df_train.drop(df_train.filter(like='설명').columns,axis=1).shape[-1]-3




Compose(
    Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
)
