# 분석 시작

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

art = pd.read_csv("data/articles_processed.csv")                 # 상품 메타데이터
cust = pd.read_csv("data/customer_processed.csv")               # 고객 메타데이터
total_revenue = pd.read_csv("data/total_revenue.csv")       # 매출 데이터

In [10]:
art.columns

Index(['Article_Id', 'Product_Code', 'Prod_Name', 'Product_Type_Info',
       'Product_Group_Name', 'Graphical_Appearance_Info',
       'Perceived_Colour_Master_Info', 'Department_Info', 'Index_Info',
       'Index_Group_Info', 'Section_Info', 'Garment_Group_Info'],
      dtype='str')

In [11]:
cust.columns

Index(['Customer_Id', 'Fn', 'Active', 'Club_Member_Status',
       'Fashion_News_Frequency', 'Age', 'Age_Group'],
      dtype='str')

In [12]:
total_revenue.columns

Index(['T_Dat', 'Customer_Id', 'Article_Id', 'Revenue', 'Sales_Channel_Id',
       'Month', 'Year_Month', 'Month_Day', 'Total_Revenue', 'Revenue_Group'],
      dtype='str')

In [13]:
# # ---------------------------------------------------------------------
# 분석용 통합 테이블 거래+고객+상품
# 볼수 있는 데이터
# ==  거래(매출, 채널, 날짜, 고객그룹) + 고객속성 + 상품속성
# 가입한 사람이 아닌 소비한 고객을 골라야 하기 때문에 inner 사용
# ---------------------------------------------------------------------

analysis_master = (
    total_revenue
    .merge(cust, on="Customer_Id", how="inner")
    .merge(art, on="Article_Id", how="inner")
)
analysis_master.shape

(804149, 27)

In [None]:
(804149, 25)

In [14]:
analysis_master.head()

Unnamed: 0,T_Dat,Customer_Id,Article_Id,Revenue,Sales_Channel_Id,Month,Year_Month,Month_Day,Total_Revenue,Revenue_Group,...,Prod_Name,Product_Type_Info,Product_Group_Name,Graphical_Appearance_Info,Perceived_Colour_Master_Info,Department_Info,Index_Info,Index_Group_Info,Section_Info,Garment_Group_Info
0,2019-11-05,3e2b60b679e62fb49516105b975560082922011dd752ec...,698328010,0.016932,2,11,2019-11,11-05,0.227186,top_20,...,ZEBRA CF TVP,Sweater (252),Garment Upper body,Placement print (1010014),Blue (2),Tops Fancy Jersey (1640),Divided (D),Divided (2),Divided Collection (53),Jersey Fancy (1005)
1,2019-05-22,89647ac2274f54c770aaa4b326e0eea09610c252381f37...,760597002,0.033881,2,5,2019-05,05-22,0.067746,middle,...,BUBBLE WRAP TOP,Top (254),Garment Upper body,Stripe (1010017),White (9),Jersey (1660),Ladieswear (A),Ladieswear (1),Womens Casual (6),Jersey Fancy (1005)
2,2019-05-10,2ebe392150feb60ca89caa8eff6c08b7ef1138cd6fdc71...,488561032,0.016932,2,5,2019-05,05-10,0.016932,bottom_20,...,Teddy jogger.,Shorts (274),Garment Lower body,Chambray (1010024),Blue (2),Shorts (5658),Menswear (F),Menswear (3),Contemporary Casual (21),Shorts (1025)
3,2019-08-26,7b3205de4ca17a339624eb5e3086698e9984eba6b47c56...,682771001,0.033881,2,8,2019-08,08-26,0.033881,middle,...,Yuki shopper,Bag (66),Accessories,Solid (1010016),Black (5),Bags (3209),Ladies Accessories (C),Ladieswear (1),Womens Big accessories (65),Accessories (1019)
4,2019-08-10,3b77905de8b32045f08cedb79200cdfa477e9562429a39...,742400033,0.00322,1,8,2019-08,08-10,0.028627,middle,...,1pk Sportsock,Socks (302),Socks & Tights,All over pattern (1010001),White (9),Socks Wall (5999),Menswear (F),Menswear (3),Men Underwear (26),Socks and Tights (1021)


In [15]:
analysis_master.columns

Index(['T_Dat', 'Customer_Id', 'Article_Id', 'Revenue', 'Sales_Channel_Id',
       'Month', 'Year_Month', 'Month_Day', 'Total_Revenue', 'Revenue_Group',
       'Fn', 'Active', 'Club_Member_Status', 'Fashion_News_Frequency', 'Age',
       'Age_Group', 'Product_Code', 'Prod_Name', 'Product_Type_Info',
       'Product_Group_Name', 'Graphical_Appearance_Info',
       'Perceived_Colour_Master_Info', 'Department_Info', 'Index_Info',
       'Index_Group_Info', 'Section_Info', 'Garment_Group_Info'],
      dtype='str')

- 없는 컬럼
    - Day_name

- 있는 컬럼
    - Year_Month == 년/월
    - Month_Day == 월/일
    - Total_Revenue == 고객의 총 구매가격

# 다른 이름의 컬럼
    - Price = Revenue
    - Segment= Revenue_Group

In [18]:
# 실제 분석용 데이터프레임
analysis_christmas = analysis_master[
    [
        # 키
        "Customer_Id", "Article_Id",
        # 시간
        "Year_Month", "Month", "Month_Day",
        # 거래
        "Revenue", "Sales_Channel_Id",
        # 고객
        "Age_Group", "Revenue_Group", "Club_Member_Status", "Fn", "Active", # FN: 뉴스 구독, club_member_status: 팬클럽 가입여부
        # 상품 특성 (크리스마스 분석 핵심)
        "Product_Group_Name", "Garment_Group_Info", "Product_Type_Info",
        "Graphical_Appearance_Info",                 # 패턴
        "Perceived_Colour_Master_Info",    # (색상은 조원 담당이지만 보조로 남겨도 됨)
        "Section_Info", "Index_Group_Info",

    ]
].copy()