In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
import matplotlib as mpl
%matplotlib inline
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)

In [2]:
import os, sys
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
default_dir = "/content/gdrive/My Drive"
df_train = pd.read_csv(os.path.join(default_dir,"train.csv")) # 학습용 데이터
df_test = pd.read_csv(os.path.join(default_dir,"submission.csv")) # 테스트 데이터(제출파일의 데이터)

# ver_pro
- 결측치 없음
- business_area와 product_category와 밀접한 관련
- Product로 매핑한 결과, ver_pro가 1인 Product는 모두 Commercial Display에 속했다

In [4]:
df_train['ver_pro'].value_counts()

0    56286
1     3013
Name: ver_pro, dtype: int64

In [5]:
selected_columns_df = df_train[['business_area', 'product_category', 'ver_pro']]
selected_columns_df[selected_columns_df['ver_pro'] == 1]

Unnamed: 0,business_area,product_category,ver_pro
388,corporate / office,standard signage,1
389,corporate / office,high brightness signage,1
390,corporate / office,interactive signage,1
391,corporate / office,standard signage,1
392,corporate / office,interactive signage,1
...,...,...,...
58101,hotel & accommodation,hotel tv,1
58104,hotel & accommodation,hotel tv,1
58105,hotel & accommodation,hotel tv,1
58110,hotel & accommodation,hotel tv,1


In [6]:
df_train['product_category'] = df_train['product_category'].str.lower().str.replace(" ", "")
df_test['product_category'] = df_test['product_category'].str.lower().str.replace(" ", "")

In [7]:
product_mapping = { 'HVAC/ESS': ['control', 'ventilation', 'vrf', 'multi-split', 'arcondicionadoresidencial','single-split', 'chiler','chiller', 'heating','rac','tetooucasseteinverter','multiinverter'],
                     'Commercial Display': ['monitorsignage,monior/monitortv','monitorsignage,monior/monitortv,vacuumcleaner,...	','tvsignage','monitorsignage,commercialtv,monior/monitortv','interactivedigitalboard','digitalsignage','signagecaresolutions','smarttvsignage','uhdsignage','oledsignage', 'ledsignage', 'videowallsignage', 'videowall','interactivesignage', 'highbrightnesssignage','highbrightness', 'specialsignage', 'standardsignage', 'hoteltv', 'hospitaltv', 'signagecaresolution', 'lgone:quickseries','accessories', 'webos', 'one:quickseries', 'pro:centric'],
                      'IT PRODUCTS': ['monitor', 'laptop', 'projector','pc', 'clouddevice', 'medicaldisplay'],
                      'Commerical Laundry': ['titan(largecapacity)', 'giant(standardcapacity)'],
                      'Compressor & Motor': ['reciprocatingcompressor', 'rotarycompressor', 'scrollcompressor', 'motor'],
                      'ADVANCED MATERIALS': ['antimicrobial', 'porcelainenamel', 'specialtyglass'] ,
                      'Robot': ['lgcloiuv-cbot', 'lgcloiservebot(shelftype)', 'lgcloiservebot(drawertype)', 'lgcloiguidebot'],
                      'Others':['etc.','others','other','softwaresolution']
                    }

def map_product_category(value):
    for product, values in product_mapping.items():
        if value in values:
            return product
    return value  # 매핑되지 않은 경우 원래 값을 반환

df_train['Product'] = df_train['product_category'].apply(map_product_category)
df_test['Product'] = df_test['product_category'].apply(map_product_category)

In [8]:
selected_columns_df = df_train[['business_area','product_category', 'Product','ver_pro']]
selected_columns_df

Unnamed: 0,business_area,product_category,Product,ver_pro
0,corporate / office,multi-split,HVAC/ESS,0
1,corporate / office,multi-split,HVAC/ESS,0
2,corporate / office,single-split,HVAC/ESS,0
3,corporate / office,vrf,HVAC/ESS,0
4,corporate / office,multi-split,HVAC/ESS,0
...,...,...,...,...
59294,public facility,vrf,HVAC/ESS,0
59295,public facility,etc.,Others,0
59296,public facility,single-split,HVAC/ESS,0
59297,public facility,etc.,Others,0


In [9]:
selected_columns_df = df_train[['business_area','product_category', 'Product','ver_pro']]
selected_columns_df[selected_columns_df['ver_pro']==1]

Unnamed: 0,business_area,product_category,Product,ver_pro
388,corporate / office,standardsignage,Commercial Display,1
389,corporate / office,highbrightnesssignage,Commercial Display,1
390,corporate / office,interactivesignage,Commercial Display,1
391,corporate / office,standardsignage,Commercial Display,1
392,corporate / office,interactivesignage,Commercial Display,1
...,...,...,...,...
58101,hotel & accommodation,hoteltv,Commercial Display,1
58104,hotel & accommodation,hoteltv,Commercial Display,1
58105,hotel & accommodation,hoteltv,Commercial Display,1
58110,hotel & accommodation,hoteltv,Commercial Display,1
