In [176]:
def get_font_family():
    """
    시스템 환경에 따른 기본 폰트명을 반환하는 함수
    """
    import platform
    system_name = platform.system()
    # colab 사용자는 system_name이 'Linux'로 확인

    if system_name == "Darwin" :
        font_family = "AppleGothic"
    elif system_name == "Windows":
        font_family = "Malgun Gothic"
    else:
        # Linux
        # colab에서는 runtime을 <꼭> 재시작 해야함.
        # 런타임을 재시작 하지 않고 폰트 설치를 하면 기본 설정 폰트가 로드되어 한글이 깨짐.
        !apt-get update -qq
        !apt-get install fonts-nanum -qq  > /dev/null

        import matplotlib.font_manager as fm

        fontpath = '/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf'
        font = fm.FontProperties(fname=fontpath, size=9)
        fm._rebuild()
        font_family = "NanumBarunGothic"
    return font_family

In [177]:
# 시각화를 위한 폰트설정
# 위에서 만든 함수를 통해 시스템 폰트를 불러와서 font_family 라는 변수에 할당.
a = get_font_family()
# 폰트설정
import matplotlib.pyplot as plt 
plt.rc("font", family = a)
# 마이너스폰트 설정
plt.rc("axes", unicode_minus=False)
# ggplot으로 그래프 스타일 설정 / 개인 자유
plt.style.use("ggplot")

In [178]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import warnings
warnings.filterwarnings(action='ignore')

#한글 폰트 설정
plt.rcParams['font.family'] = 'HYGothic-Extra'

df = pd.read_excel('클로젯셰어_과제DB 대체 자료.xlsx')
bags = pd.read_csv('bag_price.csv').drop('Unnamed: 0', axis=1)

display(df)
display(bags)

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn
0,Gucci,flower dionysus shoulder bag beige,a161237,2019-12-06,2021-01-15,7days,BA,Y
1,El estilo de Claire,beads cropped jacket black,a1140859,2021-01-02,2021-01-15,4days,O,
2,Danha,embroidery point oriental mini skirt blue,a1140859,2021-01-02,2021-01-15,4days,B,
3,Danha,graphics pattern top pink,a1140859,2021-01-02,2021-01-15,4days,T,
4,Burberry London,buckle basic pattern zipup-jumper navy,a1140740,2021-01-01,2021-01-15,7days,O,Y
...,...,...,...,...,...,...,...,...
1466,Yves Saint Laurent,logo point clutch bag pink,a119721,2019-04-10,2021-03-01,7days,BA,Y
1467,Prada,saffiano lux M gray,a1146069,2021-02-11,2021-03-01,7days,BA,Y
1468,Burberry,basic pattern sweater beige,a1147848,2021-02-28,2021-03-01,7days,T,Y
1469,Chanel,graphics pattern sweat-shirts white,a1147788,2021-02-28,2021-03-01,7days,T,Y


Unnamed: 0,brandnm,goodsnm,4days,7days
0,FENDI,rogo buckle baguette bag brown,39000,49000
1,GUCCI,flower pattern clutch bag,49000,59000
2,CHANEL,classic clutch black,49000,59000
3,CHANEL,classic rogo big clutch bag black,49000,59000
4,GUCCI,CG pattern cross bag brown,49000,59000
...,...,...,...,...
380,CHANEL,vintage chain shoulder bag,49000,59000
381,CHANEL,boy caviar quilted cluth black,49000,59000
382,YVES SAINT LAURENT,kate small monogram bag beige,49000,59000
383,RIMOWA,topaz 53,39000,49000


# inner join

In [179]:
inner_dt = pd.merge(df, bags, how='inner', on='goodsnm')
inner_dt.drop('brandnm_y', axis=1, inplace=True)
inner_dt.columns = ['brandnm','goodsnm','USER','reg_date','order_date','p_type','sub_type','app_yn','4days','7days']
inner_dt

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days
0,Dior,croisiere patent lady bag,a1127813,2020-10-31,2021-01-14,4days,BA,Y,49000,59000
1,Christian Dior,croisiere patent lady bag,a1144515,2021-01-26,2021-01-28,4days,BA,Y,49000,59000
2,Dior,croisiere patent lady bag,a1121708,2020-09-28,2021-01-21,4days,BA,Y,49000,59000
3,Dior,croisiere patent lady bag,a1149454,2021-03-15,2021-03-15,7days,BA,Y,49000,59000
4,Dior,croisiere patent lady bag,a1148229,2021-03-04,2021-03-04,4days,BA,,49000,59000
...,...,...,...,...,...,...,...,...,...,...
642,Gucci,monogram backpack brown,a1149535,2021-03-16,2021-03-16,7days,BA,,49000,59000
643,Saint Laurent,monogram shoulder bag brown,a1147934,2021-03-01,2021-03-15,4days,BA,Y,39000,49000
644,Gucci,GG soho chain clutch bag black,a1147854,2021-02-28,2021-03-09,4days,BA,Y,49000,59000
645,Chanel,unique pattern boy cross bag,a1147817,2021-02-28,2021-03-03,7days,BA,Y,49000,59000


## concat

In [180]:
concat_dt = pd.concat([df, inner_dt])
concat_dt.reset_index(inplace=True)
concat_dt.drop('index', axis=1, inplace=True)
concat_dt

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days
0,Gucci,flower dionysus shoulder bag beige,a161237,2019-12-06,2021-01-15,7days,BA,Y,,
1,El estilo de Claire,beads cropped jacket black,a1140859,2021-01-02,2021-01-15,4days,O,,,
2,Danha,embroidery point oriental mini skirt blue,a1140859,2021-01-02,2021-01-15,4days,B,,,
3,Danha,graphics pattern top pink,a1140859,2021-01-02,2021-01-15,4days,T,,,
4,Burberry London,buckle basic pattern zipup-jumper navy,a1140740,2021-01-01,2021-01-15,7days,O,Y,,
...,...,...,...,...,...,...,...,...,...,...
2113,Gucci,monogram backpack brown,a1149535,2021-03-16,2021-03-16,7days,BA,,49000.0,59000.0
2114,Saint Laurent,monogram shoulder bag brown,a1147934,2021-03-01,2021-03-15,4days,BA,Y,39000.0,49000.0
2115,Gucci,GG soho chain clutch bag black,a1147854,2021-02-28,2021-03-09,4days,BA,Y,49000.0,59000.0
2116,Chanel,unique pattern boy cross bag,a1147817,2021-02-28,2021-03-03,7days,BA,Y,49000.0,59000.0


In [181]:
concat_dt.drop_duplicates(subset = ['brandnm','goodsnm','USER','reg_date','order_date','p_type','sub_type','app_yn'],
                          keep='last', inplace=True)
concat_dt

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days
0,Gucci,flower dionysus shoulder bag beige,a161237,2019-12-06,2021-01-15,7days,BA,Y,,
1,El estilo de Claire,beads cropped jacket black,a1140859,2021-01-02,2021-01-15,4days,O,,,
2,Danha,embroidery point oriental mini skirt blue,a1140859,2021-01-02,2021-01-15,4days,B,,,
3,Danha,graphics pattern top pink,a1140859,2021-01-02,2021-01-15,4days,T,,,
4,Burberry London,buckle basic pattern zipup-jumper navy,a1140740,2021-01-01,2021-01-15,7days,O,Y,,
...,...,...,...,...,...,...,...,...,...,...
2113,Gucci,monogram backpack brown,a1149535,2021-03-16,2021-03-16,7days,BA,,49000.0,59000.0
2114,Saint Laurent,monogram shoulder bag brown,a1147934,2021-03-01,2021-03-15,4days,BA,Y,39000.0,49000.0
2115,Gucci,GG soho chain clutch bag black,a1147854,2021-02-28,2021-03-09,4days,BA,Y,49000.0,59000.0
2116,Chanel,unique pattern boy cross bag,a1147817,2021-02-28,2021-03-03,7days,BA,Y,49000.0,59000.0


In [182]:
an = concat_dt.app_yn.fillna('N')
concat_dt['app_yn'] = an
concat_dt

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days
0,Gucci,flower dionysus shoulder bag beige,a161237,2019-12-06,2021-01-15,7days,BA,Y,,
1,El estilo de Claire,beads cropped jacket black,a1140859,2021-01-02,2021-01-15,4days,O,N,,
2,Danha,embroidery point oriental mini skirt blue,a1140859,2021-01-02,2021-01-15,4days,B,N,,
3,Danha,graphics pattern top pink,a1140859,2021-01-02,2021-01-15,4days,T,N,,
4,Burberry London,buckle basic pattern zipup-jumper navy,a1140740,2021-01-01,2021-01-15,7days,O,Y,,
...,...,...,...,...,...,...,...,...,...,...
2113,Gucci,monogram backpack brown,a1149535,2021-03-16,2021-03-16,7days,BA,N,49000.0,59000.0
2114,Saint Laurent,monogram shoulder bag brown,a1147934,2021-03-01,2021-03-15,4days,BA,Y,39000.0,49000.0
2115,Gucci,GG soho chain clutch bag black,a1147854,2021-02-28,2021-03-09,4days,BA,Y,49000.0,59000.0
2116,Chanel,unique pattern boy cross bag,a1147817,2021-02-28,2021-03-03,7days,BA,Y,49000.0,59000.0


In [183]:
zero4 = concat_dt['4days'].fillna(0) # 결측치를 0으로 채움
concat_dt['4days'] = zero4

zero7 = concat_dt['7days'].fillna(0) # 결측치를 0으로 채움
concat_dt['7days'] = zero7

concat_dt

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days
0,Gucci,flower dionysus shoulder bag beige,a161237,2019-12-06,2021-01-15,7days,BA,Y,0.0,0.0
1,El estilo de Claire,beads cropped jacket black,a1140859,2021-01-02,2021-01-15,4days,O,N,0.0,0.0
2,Danha,embroidery point oriental mini skirt blue,a1140859,2021-01-02,2021-01-15,4days,B,N,0.0,0.0
3,Danha,graphics pattern top pink,a1140859,2021-01-02,2021-01-15,4days,T,N,0.0,0.0
4,Burberry London,buckle basic pattern zipup-jumper navy,a1140740,2021-01-01,2021-01-15,7days,O,Y,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2113,Gucci,monogram backpack brown,a1149535,2021-03-16,2021-03-16,7days,BA,N,49000.0,59000.0
2114,Saint Laurent,monogram shoulder bag brown,a1147934,2021-03-01,2021-03-15,4days,BA,Y,39000.0,49000.0
2115,Gucci,GG soho chain clutch bag black,a1147854,2021-02-28,2021-03-09,4days,BA,Y,49000.0,59000.0
2116,Chanel,unique pattern boy cross bag,a1147817,2021-02-28,2021-03-03,7days,BA,Y,49000.0,59000.0


In [184]:
concat_dt['4days'] = concat_dt['4days'].astype(int)
concat_dt['7days'] = concat_dt['7days'].astype(int)

In [185]:
concat_dt

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days
0,Gucci,flower dionysus shoulder bag beige,a161237,2019-12-06,2021-01-15,7days,BA,Y,0,0
1,El estilo de Claire,beads cropped jacket black,a1140859,2021-01-02,2021-01-15,4days,O,N,0,0
2,Danha,embroidery point oriental mini skirt blue,a1140859,2021-01-02,2021-01-15,4days,B,N,0,0
3,Danha,graphics pattern top pink,a1140859,2021-01-02,2021-01-15,4days,T,N,0,0
4,Burberry London,buckle basic pattern zipup-jumper navy,a1140740,2021-01-01,2021-01-15,7days,O,Y,0,0
...,...,...,...,...,...,...,...,...,...,...
2113,Gucci,monogram backpack brown,a1149535,2021-03-16,2021-03-16,7days,BA,N,49000,59000
2114,Saint Laurent,monogram shoulder bag brown,a1147934,2021-03-01,2021-03-15,4days,BA,Y,39000,49000
2115,Gucci,GG soho chain clutch bag black,a1147854,2021-02-28,2021-03-09,4days,BA,Y,49000,59000
2116,Chanel,unique pattern boy cross bag,a1147817,2021-02-28,2021-03-03,7days,BA,Y,49000,59000


In [186]:
concat_dt.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1471 entries, 0 to 2117
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   brandnm     1442 non-null   object        
 1   goodsnm     1471 non-null   object        
 2   USER        1471 non-null   object        
 3   reg_date    1466 non-null   datetime64[ns]
 4   order_date  1471 non-null   datetime64[ns]
 5   p_type      1471 non-null   object        
 6   sub_type    1471 non-null   object        
 7   app_yn      1471 non-null   object        
 8   4days       1471 non-null   int32         
 9   7days       1471 non-null   int32         
dtypes: datetime64[ns](2), int32(2), object(6)
memory usage: 114.9+ KB


In [187]:
concat_dt.isnull().sum()

brandnm       29
goodsnm        0
USER           0
reg_date       5
order_date     0
p_type         0
sub_type       0
app_yn         0
4days          0
7days          0
dtype: int64

# concat_dt에서 BA 데이터(가방)만 추출

In [188]:
dt_ba = concat_dt.query('sub_type == "BA"')
dt_ba

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days
0,Gucci,flower dionysus shoulder bag beige,a161237,2019-12-06,2021-01-15,7days,BA,Y,0,0
8,Gucci,dionysus cross bag,a1143028,2021-01-14,2021-01-15,4days,BA,Y,0,0
10,Louis Vuitton,twist monogram patent shoulder bag black,a1140216,2020-12-28,2021-01-15,7days,BA,Y,0,0
11,Tory Burch,flower ethnic pattern cross bag white,a1141255,2021-01-05,2021-01-15,4days,BA,Y,0,0
14,Chloe,faye small bag black,a1129386,2020-11-09,2021-01-15,4days,BA,Y,0,0
...,...,...,...,...,...,...,...,...,...,...
2113,Gucci,monogram backpack brown,a1149535,2021-03-16,2021-03-16,7days,BA,N,49000,59000
2114,Saint Laurent,monogram shoulder bag brown,a1147934,2021-03-01,2021-03-15,4days,BA,Y,39000,49000
2115,Gucci,GG soho chain clutch bag black,a1147854,2021-02-28,2021-03-09,4days,BA,Y,49000,59000
2116,Chanel,unique pattern boy cross bag,a1147817,2021-02-28,2021-03-03,7days,BA,Y,49000,59000


In [189]:
print(dt_ba.brandnm.unique())
print(dt_ba.brandnm.nunique()) # 브랜드 총 47개

['Gucci' 'Louis Vuitton' 'Tory Burch' 'Chloe' 'Prada' 'Fendi'
 'Bottega Veneta' 'Celine' 'Christian Dior' 'Biker Starlet' 'Givenchy'
 'Vivienne Westwood' 'Saint Laurent' 'Yves Saint Laurent' 'Chanel'
 'Burberry' 'Vunque' 'Salvatore Ferragamo' 'Balenciaga' 'Miu Miu' 'Dior'
 'Mulberry' 'Marni' 'Dolce & Gabbana' 'DELLEST' 'Play Nomore' 'Valentino'
 'Rosa.K' 'Bulgari' 'A.P.C.' 'Kenzo' 'Cesare Paciotti' 'Stella Mccartney'
 'Goyard' 'Maison Margiela' 'Coach' 'thom browne' 'BYREDO'
 'Rebecca Minkoff' 'Hermes' 'Furla' 'Delvaux' 'Red Valentino'
 'Rogervivier' 'Off White' 'Clu' 'Rimowa']
47


In [190]:
ba_mean = dt_ba.groupby('brandnm').mean()
ba_mean.reset_index(inplace=True)
#ba_mean   # ??? 가격이 너무 낮은데?? 

In [191]:
bags_mean = bags.groupby('brandnm').mean() #크롤링한 데이터를 바탕으로 한 평균 값
bags_mean.reset_index(inplace=True)
#dt_brands.drop('index', axis=1, inplace=True)
bags_mean

Unnamed: 0,brandnm,4days,7days
0,BALENCIAGA,39000.0,49000.0
1,CELINE,45500.0,55500.0
2,CHANEL,47333.333333,57333.333333
3,CHRISTIAN DIOR,45666.666667,55666.666667
4,DELVAUX,52333.333333,62333.333333
5,DIOR,49000.0,59000.0
6,FENDI,47823.529412,57823.529412
7,FONTANA,39000.0,49000.0
8,GUCCI,46291.666667,56291.666667
9,HERMES,39000.0,49000.0


In [192]:
brands_mean = inner_dt.groupby('brandnm').mean() # 이너 조인한 데이터 평균값
brands_mean.reset_index(inplace=True)
brands_mean

Unnamed: 0,brandnm,4days,7days
0,Balenciaga,39000.0,49000.0
1,Celine,44909.090909,54909.090909
2,Chanel,46368.421053,56368.421053
3,Christian Dior,44000.0,54000.0
4,Delvaux,59000.0,69000.0
5,Dior,49000.0,59000.0
6,Fendi,49000.0,59000.0
7,Gucci,46987.804878,56987.804878
8,Hermes,39000.0,49000.0
9,Louis Vuitton,46684.210526,56684.210526


In [193]:
print(len(ba_mean.loc[(ba_mean['4days'] == 0) | (ba_mean['7days'] == 0)])) # 31개 브랜드
print(len(ba_mean.loc[(ba_mean['4days'] == 0) & (ba_mean['7days'] == 0)]))

31
31


- 47개 브랜드 중 31개 브랜드 데이터가 없음.....

In [199]:
dt_ba.reset_index(inplace=True)
dt_ba.drop('index', axis=1, inplace=True)
#dt_ba.drop('level_0', axis=1, inplace=True)
dt_ba

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days
0,Gucci,flower dionysus shoulder bag beige,a161237,2019-12-06,2021-01-15,7days,BA,Y,46300,56300
1,Gucci,dionysus cross bag,a1143028,2021-01-14,2021-01-15,4days,BA,Y,0,0
2,Louis Vuitton,twist monogram patent shoulder bag black,a1140216,2020-12-28,2021-01-15,7days,BA,Y,0,0
3,Tory Burch,flower ethnic pattern cross bag white,a1141255,2021-01-05,2021-01-15,4days,BA,Y,0,0
4,Chloe,faye small bag black,a1129386,2020-11-09,2021-01-15,4days,BA,Y,0,0
...,...,...,...,...,...,...,...,...,...,...
687,Gucci,monogram backpack brown,a1149535,2021-03-16,2021-03-16,7days,BA,N,49000,59000
688,Saint Laurent,monogram shoulder bag brown,a1147934,2021-03-01,2021-03-15,4days,BA,Y,39000,49000
689,Gucci,GG soho chain clutch bag black,a1147854,2021-02-28,2021-03-09,4days,BA,Y,49000,59000
690,Chanel,unique pattern boy cross bag,a1147817,2021-02-28,2021-03-03,7days,BA,Y,49000,59000


In [200]:
## 크롤링한 데이터의 평균 값을 4/7days nan 값에 넣기

for i in range(len(dt_ba)):
    if ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Balenciaga')):
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
        
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Celine')):
        dt_ba['4days'][i] = 45500
        dt_ba['7days'][i] = 55500

    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Chanel')):
        dt_ba['4days'][i] = 47330
        dt_ba['7days'][i] = 57330

    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Delvaux')):
        dt_ba['4days'][i] = 52330
        dt_ba['7days'][i] = 62330
    
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Fendi')):
        dt_ba['4days'][i] = 47820
        dt_ba['7days'][i] = 57820
    
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Gucci')):
        dt_ba['4days'][i] = 46300
        dt_ba['7days'][i] = 56300
        
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Hermes')):
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
    
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Louis Vuitton')):
        dt_ba['4days'][i] = 46580
        dt_ba['7days'][i] = 56580
        
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Miu Miu')):
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
    
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Prada')):
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
    
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Rimowa')):
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
    
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Saint Laurent')):
        dt_ba['4days'][i] = 45670
        dt_ba['7days'][i] = 55670
    
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Christian Dior')):
        dt_ba['4days'][i] = 45670
        dt_ba['7days'][i] = 55670
    
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Yves Saint Laurent')):
        dt_ba['4days'][i] = 46500
        dt_ba['7days'][i] = 56500
        
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Dior')):
        dt_ba['4days'][i] = 49000
        dt_ba['7days'][i] = 59000
        
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Fontana')):
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
        
    elif ((dt_ba['sub_type'][i] == 'BA') & (dt_ba['brandnm'][i] == 'Salvatore Ferragamo')):
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
    else:
        pass
        
dt_ba #ba만 있는 데이터

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days
0,Gucci,flower dionysus shoulder bag beige,a161237,2019-12-06,2021-01-15,7days,BA,Y,46300,56300
1,Gucci,dionysus cross bag,a1143028,2021-01-14,2021-01-15,4days,BA,Y,46300,56300
2,Louis Vuitton,twist monogram patent shoulder bag black,a1140216,2020-12-28,2021-01-15,7days,BA,Y,46580,56580
3,Tory Burch,flower ethnic pattern cross bag white,a1141255,2021-01-05,2021-01-15,4days,BA,Y,0,0
4,Chloe,faye small bag black,a1129386,2020-11-09,2021-01-15,4days,BA,Y,0,0
...,...,...,...,...,...,...,...,...,...,...
687,Gucci,monogram backpack brown,a1149535,2021-03-16,2021-03-16,7days,BA,N,46300,56300
688,Saint Laurent,monogram shoulder bag brown,a1147934,2021-03-01,2021-03-15,4days,BA,Y,45670,55670
689,Gucci,GG soho chain clutch bag black,a1147854,2021-02-28,2021-03-09,4days,BA,Y,46300,56300
690,Chanel,unique pattern boy cross bag,a1147817,2021-02-28,2021-03-03,7days,BA,Y,47330,57330


In [201]:
a = dt_ba.loc[dt_ba['4days'] == 0] # 79 rows
a.brandnm.unique()

array(['Tory Burch', 'Chloe', 'Bottega Veneta', 'Biker Starlet',
       'Givenchy', 'Vivienne Westwood', 'Burberry', 'Vunque', 'Mulberry',
       'Marni', 'Dolce & Gabbana', 'DELLEST', 'Play Nomore', 'Valentino',
       'Rosa.K', 'Bulgari', 'A.P.C.', 'Kenzo', 'Cesare Paciotti',
       'Stella Mccartney', 'Goyard', 'Maison Margiela', 'Coach',
       'thom browne', 'BYREDO', 'Rebecca Minkoff', 'Furla',
       'Red Valentino', 'Rogervivier', 'Off White', 'Clu'], dtype=object)

In [202]:
a.brandnm.nunique()

31

In [203]:
dt_ba.to_csv('only_onetime_bags_price.csv') # 0 값인 데이터들은 아직 구하지 않은 값.

클로젯셰어 가방>1회권>검색

- 없음 : 나머지
- Chloe(1) : 39000/49000
- : 클로젯셰어 홈페이지 검색

In [204]:
exc_dt = pd.read_excel('except_brands.xlsx') # 각 브랜드 검색 후 나온 데이터들의 평균값을 구한 파일
exc_dt

Unnamed: 0,brandnm,4days,7days
0,Tory Burch,33615.384615,43615.384615
1,Chloe,35153.846154,45153.846154
2,Bottega Veneta,38166.666667,48166.666667
3,Biker Starlet,34000.0,44000.0
4,Givenchy,35730.769231,45730.769231
5,Vivienne Westwood,34000.0,44000.0
6,Burberry,36115.384615,46115.384615
7,Vunque,34000.0,44000.0
8,Mulberry,35153.846154,45153.846154
9,Marni,37947.368421,47947.368421


In [205]:
exc_dt['4days'] = exc_dt['4days'].astype(int)
exc_dt['7days'] = exc_dt['7days'].astype(int)
exc_dt

Unnamed: 0,brandnm,4days,7days
0,Tory Burch,33615,43615
1,Chloe,35153,45153
2,Bottega Veneta,38166,48166
3,Biker Starlet,34000,44000
4,Givenchy,35730,45730
5,Vivienne Westwood,34000,44000
6,Burberry,36115,46115
7,Vunque,34000,44000
8,Mulberry,35153,45153
9,Marni,37947,47947


In [206]:
## 평균 값을 4/7days nan 값에 넣기

for i in range(len(dt_ba)):
    if dt_ba['brandnm'][i] == 'Tory Burch':
        dt_ba['4days'][i] = 33620
        dt_ba['7days'][i] = 43620
        
    elif dt_ba['brandnm'][i] == 'Chloe':
        dt_ba['4days'][i] = 35150
        dt_ba['7days'][i] = 45150

    elif dt_ba['brandnm'][i] == 'Bottega Veneta':
        dt_ba['4days'][i] = 38170
        dt_ba['7days'][i] = 48170

    elif dt_ba['brandnm'][i] == 'Biker Starlet':
        dt_ba['4days'][i] = 34000
        dt_ba['7days'][i] = 44000
    
    elif dt_ba['brandnm'][i] == 'Givenchy':
        dt_ba['4days'][i] = 35730
        dt_ba['7days'][i] = 45730
    
    elif dt_ba['brandnm'][i] == 'Vivienne Westwood':
        dt_ba['4days'][i] = 34000
        dt_ba['7days'][i] = 44000
        
    elif dt_ba['brandnm'][i] == 'Burberry':
        dt_ba['4days'][i] = 36120
        dt_ba['7days'][i] = 46120
    
    elif dt_ba['brandnm'][i] == 'Vunque':
        dt_ba['4days'][i] = 34000
        dt_ba['7days'][i] = 44000
        
    elif dt_ba['brandnm'][i] == 'Mulberry':
        dt_ba['4days'][i] = 35150
        dt_ba['7days'][i] = 45150
    
    elif dt_ba['brandnm'][i] == 'Marni':
        dt_ba['4days'][i] = 37950
        dt_ba['7days'][i] = 47950
    
    elif dt_ba['brandnm'][i] == 'Dolce & Gabbana':
        dt_ba['4days'][i] = 37330
        dt_ba['7days'][i] = 47330
    
    elif dt_ba['brandnm'][i] == 'DELLEST':
        dt_ba['4days'][i] = 34380
        dt_ba['7days'][i] = 44380
    
    elif dt_ba['brandnm'][i] == 'Play Nomore':
        dt_ba['4days'][i] = 34000
        dt_ba['7days'][i] = 44000
    
    elif dt_ba['brandnm'][i] == 'Valentino':
        dt_ba['4days'][i] = 37640
        dt_ba['7days'][i] = 47640
        
    elif dt_ba['brandnm'][i] == 'Rosa.K':
        dt_ba['4days'][i] = 34000
        dt_ba['7days'][i] = 44000
        
    elif dt_ba['brandnm'][i] == 'Bulgari':
        dt_ba['4days'][i] = 37330
        dt_ba['7days'][i] = 47330
        
    elif dt_ba['brandnm'][i] == 'A.P.C.':
        dt_ba['4days'][i] = 32460
        dt_ba['7days'][i] = 42460
        
    elif dt_ba['brandnm'][i] == 'Kenzo':
        dt_ba['4days'][i] = 34000
        dt_ba['7days'][i] = 44000
        
    elif dt_ba['brandnm'][i] == 'Cesare Paciotti':
        dt_ba['4days'][i] = 34000
        dt_ba['7days'][i] = 44000
        
    elif dt_ba['brandnm'][i] == 'Stella Mccartney':
        dt_ba['4days'][i] = 33170
        dt_ba['7days'][i] = 43170
        
    elif dt_ba['brandnm'][i] == 'Goyard':
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
        
    elif dt_ba['brandnm'][i] == 'Maison Margiela':
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
        
    elif dt_ba['brandnm'][i] == 'Coach':
        dt_ba['4days'][i] = 33620
        dt_ba['7days'][i] = 43620
        
    elif dt_ba['brandnm'][i] == 'thom browne':
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
        
    elif dt_ba['brandnm'][i] == 'BYREDO':
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
        
    elif dt_ba['brandnm'][i] == 'Rebecca Minkoff':
        dt_ba['4days'][i] = 33420
        dt_ba['7days'][i] = 43420
        
    elif dt_ba['brandnm'][i] == 'Furla':
        dt_ba['4days'][i] = 33500
        dt_ba['7days'][i] = 43500
        
    elif dt_ba['brandnm'][i] == 'Red Valentino':
        dt_ba['4days'][i] = 37330
        dt_ba['7days'][i] = 47330
        
    elif dt_ba['brandnm'][i] == 'Rogervivier':
        dt_ba['4days'][i] = 39000
        dt_ba['7days'][i] = 49000
        
    elif dt_ba['brandnm'][i] == 'Off White':
        dt_ba['4days'][i] = 35670
        dt_ba['7days'][i] = 45670
        
    elif dt_ba['brandnm'][i] == 'Clu':
        dt_ba['4days'][i] = 34000
        dt_ba['7days'][i] = 44000
    else:
        pass
        
dt_ba #ba만 있는 데이터

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days
0,Gucci,flower dionysus shoulder bag beige,a161237,2019-12-06,2021-01-15,7days,BA,Y,46300,56300
1,Gucci,dionysus cross bag,a1143028,2021-01-14,2021-01-15,4days,BA,Y,46300,56300
2,Louis Vuitton,twist monogram patent shoulder bag black,a1140216,2020-12-28,2021-01-15,7days,BA,Y,46580,56580
3,Tory Burch,flower ethnic pattern cross bag white,a1141255,2021-01-05,2021-01-15,4days,BA,Y,33620,43620
4,Chloe,faye small bag black,a1129386,2020-11-09,2021-01-15,4days,BA,Y,35150,45150
...,...,...,...,...,...,...,...,...,...,...
687,Gucci,monogram backpack brown,a1149535,2021-03-16,2021-03-16,7days,BA,N,46300,56300
688,Saint Laurent,monogram shoulder bag brown,a1147934,2021-03-01,2021-03-15,4days,BA,Y,45670,55670
689,Gucci,GG soho chain clutch bag black,a1147854,2021-02-28,2021-03-09,4days,BA,Y,46300,56300
690,Chanel,unique pattern boy cross bag,a1147817,2021-02-28,2021-03-03,7days,BA,Y,47330,57330


In [207]:
dt_ba.loc[(dt_ba['4days'] == 0) | (dt_ba['7days'] == 0)]

Unnamed: 0,brandnm,goodsnm,USER,reg_date,order_date,p_type,sub_type,app_yn,4days,7days


In [208]:
dt_ba.to_csv('bags_prices.csv') # 추가 데이터 포함 가격 - null 없애기 위함