In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

#데이터읽기
customers = pd.read_csv('./dacon_data/open/customers.csv')
locations = pd.read_csv('./dacon_data/open/locations.csv')
orders = pd.read_csv('./dacon_data/open/orders.csv')
order_items = pd.read_csv('./dacon_data/open/order_items.csv')
payments = pd.read_csv('./dacon_data/open/payments.csv')
products = pd.read_csv('./dacon_data/open/products.csv')
reviews = pd.read_csv('./dacon_data/open/reviews.csv')
sellers = pd.read_csv('./dacon_data/open/sellers.csv')

In [12]:
reviews.groupby(["Order_id"])["Review_score"].value_counts().unique()

array([1, 2, 3])

In [3]:
order_items.shape

(100557, 6)

In [4]:
temp = pd.merge(reviews,order_items, how='inner',on='Order_id')

In [5]:
temp.shape

(100415, 10)

In [87]:
# 데이터 merge하기
temp = pd.merge(reviews,order_items, how='inner',on='Order_id')
temp = pd.merge(temp,orders, how='inner',on='Order_id')
temp = pd.merge(temp,payments, how='inner',on='Order_id')
temp = pd.merge(temp,products, how='inner',on='Product_id')
temp = pd.merge(temp,customers, how='inner',on='Customer_id')
temp = pd.merge(temp,sellers, how='inner',on='Seller_id')

In [88]:
# locations[locations.Geolocation_zipcode_prefix==1037]
# 1037을 가진 우편번호가 여러개 존재함
# 여러개의 우편번호가 존재하므로 한우편번호에 있어서 평균을 구하기로함
locations_group_lat=locations.groupby(["Geolocation_zipcode_prefix"])["Geolocation_lat"].mean().reset_index()
locations_group_lng=locations.groupby(["Geolocation_zipcode_prefix"])["Geolocation_lng"].mean().reset_index()
locations_group =  pd.merge(locations_group_lat, locations_group_lng, how = 'inner', on="Geolocation_zipcode_prefix")

In [89]:
# 구입자의 위도경도
temp = pd.merge(temp, locations_group, left_on = 'Customer_zipcode_prefix', right_on = 'Geolocation_zipcode_prefix', how = 'inner')
temp = temp.rename(columns={'Geolocation_lat':'Customer_lat',"Geolocation_lng" : "Customer_lng" })

In [90]:
# 판매자의 위도경도
temp = pd.merge(temp, locations_group, left_on = 'Seller_zipcode_prefix', right_on = 'Geolocation_zipcode_prefix', how = 'inner')
temp = temp.rename(columns={'Geolocation_lat':'Seller_lat',"Geolocation_lng" : "Seller_lng" })

In [91]:
# 중복되는 구매자,판매자 우편번호 드랍
temp = temp.drop(["Geolocation_zipcode_prefix_x","Geolocation_zipcode_prefix_y"],axis=1)

In [92]:
# date 타입으로 변환
temp['Order_purchase_timestamp']=pd.to_datetime(temp['Order_purchase_timestamp'])
temp['Order_delivered_carrier_date']=pd.to_datetime(temp['Order_delivered_carrier_date'])
temp['Order_delivered_customer_date']=pd.to_datetime(temp['Order_delivered_customer_date'])
temp['Order_estimated_delivery_date']=pd.to_datetime(temp['Order_estimated_delivery_date'])
# temp[['Order_delivered_carrier_date','Order_delivered_customer_date',"Order_purchase_timestamp"]] = temp[['Order_delivered_carrier_date','Order_delivered_customer_date',"Order_purchase_timestamp"]].apply(pd.to_datetime)

In [93]:
# temp의 시계열이 datetype으로 바뀌었는지 확인
# temp.info()
# Order_purchase_timestamp : 구매 시간
# Order_delivered_carrier_date : 물류 처리 시간
# Order_delivered_customer_date : 실제 배송 날짜
# Order_estimated_delivery_date : 기대 배송 날짜

# 구매할때부터 물류처리할때까지 걸리는 시간
temp["time_diff_purchase_carrier"] = temp["Order_delivered_carrier_date"] - temp["Order_purchase_timestamp"]
# 구매할때부터 실제배송할때까지 걸리는 시간
temp["time_diff_purchase_customer"] = temp["Order_delivered_customer_date"] - temp["Order_purchase_timestamp"]
# 물류처리할때부터 실제배송할때까지 걸리는 시간
temp["time_diff_carrier_customer"] = temp["Order_delivered_customer_date"] - temp["Order_delivered_carrier_date"]
# 기대배송날짜와 실제배송날짜의 차이
temp["time_diff_customer_delivery"]= temp['Order_estimated_delivery_date'] - temp["Order_delivered_carrier_date"]

In [94]:
# 상관관계를 구하기 위하여 2days xx:xx:xx을 초로 변환
temp["time_diff_purchase_carrier_second"] = temp["time_diff_purchase_carrier"].apply(lambda x : x.total_seconds())
temp["time_diff_purchase_customer_second"] = temp["time_diff_purchase_customer"].apply(lambda x : x.total_seconds())
temp["time_diff_carrier_customer_second"] = temp["time_diff_carrier_customer"].apply(lambda x : x.total_seconds())
temp["time_diff_customer_delivery_second"] = temp["time_diff_customer_delivery"].apply(lambda x : x.total_seconds())

In [95]:
# 구매자와 판매자의 거기 변수 생성
temp["Distance"] = ((temp["Customer_lat"] - temp["Seller_lat"])**2 + (temp["Customer_lng"] - temp["Seller_lng"])**2) **(1/2)

In [96]:
# 제대로 sum이 안되는 columns 확인
# Product_weight_g 하나뿐 object로 되어있다.
# 타입 변경해주기
temp['Product_weight_g'] = temp['Product_weight_g'].replace('Unknown', np.nan).fillna(0).astype(float)
temp['YearMonth'] = temp['Order_purchase_timestamp'].dt.strftime('%Y%m') #월별로 분류

In [97]:
temp_product_group = temp.groupby(["Product_category_name","YearMonth"])[["Review_score","Price","Freight_value","Payment_value","time_diff_purchase_carrier_second","time_diff_purchase_customer_second","time_diff_carrier_customer_second","time_diff_customer_delivery_second"]].mean().reset_index()

In [98]:
# temp_product_group

In [99]:
#주에 따라 묶어서 주에서 거래한 개수
temp_group_count= temp.groupby(["Product_category_name","YearMonth"])["Review_id"].count().reset_index()
temp_group_count["count_ratio"]=temp_group_count["Review_id"].apply(lambda x : x/temp.shape[0])
temp_group_count = temp_group_count.drop(["Review_id"],axis=1)

In [100]:
# temp_group_count

In [101]:
# 그걸 기존 df와 묶기
temp_product_group= pd.merge(temp_product_group, temp_group_count, on=['Product_category_name','YearMonth'], how='inner')

In [102]:
temp_product_group

Unnamed: 0,Product_category_name,YearMonth,Review_score,Price,Freight_value,Payment_value,time_diff_purchase_carrier_second,time_diff_purchase_customer_second,time_diff_carrier_customer_second,time_diff_customer_delivery_second,count_ratio
0,Unknown,201710,5.000000,32.945000,15.015000,47.960000,1.023936e+06,1.643517e+06,6.195815e+05,3.500696e+06,0.000019
1,Unknown,201801,3.833333,133.636667,16.257500,160.707500,3.631958e+05,9.380359e+05,5.748401e+05,2.819958e+06,0.000115
2,Unknown,201802,3.910714,152.105714,18.253929,184.743393,5.464914e+05,1.151913e+06,6.054215e+05,2.146633e+06,0.000536
3,Unknown,201803,4.096774,111.628226,16.248387,141.522742,2.890181e+05,1.047002e+06,7.579839e+05,1.769171e+06,0.000594
4,Unknown,201804,4.139241,125.618987,20.372152,151.142911,3.208460e+05,1.466365e+06,1.145519e+06,2.096951e+06,0.000756
...,...,...,...,...,...,...,...,...,...,...,...
1191,watches_gifts,201903,3.690141,228.547535,20.343967,249.038779,2.616281e+05,1.682952e+06,1.421324e+06,1.729328e+06,0.004079
1192,watches_gifts,201904,4.241779,185.727215,15.284275,205.954623,2.800097e+05,1.116989e+06,8.369788e+05,2.052820e+06,0.004950
1193,watches_gifts,201905,4.022117,190.958894,13.866746,230.024882,3.181820e+05,1.160220e+06,8.420382e+05,1.861764e+06,0.006061
1194,watches_gifts,201906,4.230290,178.123195,18.360290,217.650664,2.387331e+05,8.305463e+05,5.918132e+05,2.148818e+06,0.004615


In [103]:
# 전체 순수익 더하기
all_price_sum=temp.Price.sum()

In [104]:
# 카테고리에 따라 묶어서 순수익률 개수
temp_group_price= temp.groupby(["Product_category_name","YearMonth"])["Price"].sum().reset_index()
temp_group_price["price_ratio"]=temp_group_price["Price"].apply(lambda x : x/all_price_sum)
temp_group_price = temp_group_price.drop(["Price"],axis=1)

In [105]:
# 그걸 기존 df와 묶기
temp_product_group= pd.merge(temp_product_group, temp_group_price, on=['Product_category_name','YearMonth'], how='inner')

In [106]:
# temp_product_group

In [107]:
# 전체 매출액 더하기
all_payment_sum=temp.Payment_value.sum()

In [108]:
#주에 따라 묶어서 주에서 매출액
temp_group_payment= temp.groupby(["Product_category_name","YearMonth"])["Payment_value"].sum().reset_index() 
temp_group_payment["payment_ratio"]=temp_group_payment["Payment_value"].apply(lambda x : x/all_payment_sum)
temp_group_payment = temp_group_payment.drop(["Payment_value"],axis=1)

In [109]:
# 그걸 기존 df와 묶기
temp_product_group= pd.merge(temp_product_group, temp_group_payment, on=['Product_category_name','YearMonth'], how='inner')

In [110]:
#count_ratio 가 낮으면서 price_ratio 가 높은게 가치가 있다. 파생변수 생성
temp_product_group["value_ratio"] = temp_product_group["price_ratio"] / temp_product_group["count_ratio"] #짜피 payment_ratio와 연관성이 85

In [111]:
# temp_product_group.head(1)

In [137]:
temp_product_group.corr(numeric_only=True).style.background_gradient(cmap='coolwarm', axis=None)

Unnamed: 0,Review_score,Price,Freight_value,Payment_value,time_diff_purchase_carrier_second,time_diff_purchase_customer_second,time_diff_carrier_customer_second,time_diff_customer_delivery_second,count_ratio,price_ratio,payment_ratio,value_ratio
Review_score,1.0,-0.006718,-0.054281,-0.108433,-0.174642,-0.301704,-0.24313,0.077936,-0.048668,-0.041433,-0.074251,-0.006718
Price,-0.006718,1.0,0.426549,0.711444,0.02649,0.026206,0.014284,0.030739,-0.086328,0.05031,0.029877,1.0
Freight_value,-0.054281,0.426549,1.0,0.317944,0.078827,0.169493,0.148228,0.050553,-0.106347,-0.030275,-0.015563,0.426549
Payment_value,-0.108433,0.711444,0.317944,1.0,0.069953,0.033271,-0.003789,-0.012598,-0.063398,0.031499,0.113344,0.711444
time_diff_purchase_carrier_second,-0.174642,0.02649,0.078827,0.069953,1.0,0.501196,-0.024619,0.160625,-0.069681,-0.064226,-0.029707,0.02649
time_diff_purchase_customer_second,-0.301704,0.026206,0.169493,0.033271,0.501196,1.0,0.852732,0.182525,0.003957,0.010374,0.026726,0.026206
time_diff_carrier_customer_second,-0.24313,0.014284,0.148228,-0.003789,-0.024619,0.852732,1.0,0.113907,0.046633,0.050754,0.048808,0.014284
time_diff_customer_delivery_second,0.077936,0.030739,0.050553,-0.012598,0.160625,0.182525,0.113907,1.0,-0.091002,-0.084755,-0.089965,0.030739
count_ratio,-0.048668,-0.086328,-0.106347,-0.063398,-0.069681,0.003957,0.046633,-0.091002,1.0,0.933144,0.947752,-0.086328
price_ratio,-0.041433,0.05031,-0.030275,0.031499,-0.064226,0.010374,0.050754,-0.084755,0.933144,1.0,0.963591,0.05031


In [113]:
# tolist로 리스트화
unique_product_name_list = temp_product_group.Product_category_name.unique().tolist()

In [114]:
# 언노운제거
unique_product_name_list = unique_product_name_list[1:]

In [115]:
# product_caterory name 리스트
# unique_product_name_list

In [116]:
##### 테스트해보기

In [117]:
temp_agro_industry_and_commerce = temp_product_group[temp_product_group.Product_category_name=="agro_industry_and_commerce"]

In [118]:
temp_agro_industry_and_commerce

Unnamed: 0,Product_category_name,YearMonth,Review_score,Price,Freight_value,Payment_value,time_diff_purchase_carrier_second,time_diff_purchase_customer_second,time_diff_carrier_customer_second,time_diff_customer_delivery_second,count_ratio,price_ratio,payment_ratio,value_ratio
20,agro_industry_and_commerce,201801,4.333333,21.99,13.93,56.393333,238410.666667,724174.3,485763.7,2258004.0,2.9e-05,5e-06,9e-06,0.18375
21,agro_industry_and_commerce,201802,4.111111,29.868889,14.942222,36.788889,214357.666667,1078041.0,863683.7,2365468.0,8.6e-05,2.2e-05,1.9e-05,0.249587
22,agro_industry_and_commerce,201803,2.0,40.995,14.35,55.345,152831.0,803211.0,650380.0,1686158.0,1.9e-05,7e-06,6e-06,0.342557
23,agro_industry_and_commerce,201805,4.75,394.985,45.95,440.935,227591.0,1034258.0,806667.5,2051464.0,3.8e-05,0.000126,9.9e-05,3.300525
24,agro_industry_and_commerce,201806,1.0,1390.0,32.05,1422.05,688547.0,862409.0,173862.0,1681205.0,1e-05,0.000111,8e-05,11.614945
25,agro_industry_and_commerce,201807,5.0,1180.0,19.47,1199.47,332744.0,407485.0,74741.0,716445.0,1e-05,9.4e-05,6.7e-05,9.860169
26,agro_industry_and_commerce,201808,5.0,22.0,34.15,224.6,78253.0,1611335.0,1533082.0,3034032.0,3.8e-05,7e-06,5e-05,0.183834
27,agro_industry_and_commerce,201809,3.25,498.4975,31.425,741.7525,146451.25,886186.5,739735.2,2033410.0,3.8e-05,0.00016,0.000166,4.165483
28,agro_industry_and_commerce,201810,4.2,670.056,41.63,711.686,240308.6,919690.0,679381.4,1792707.0,4.8e-05,0.000268,0.000199,5.599039
29,agro_industry_and_commerce,201811,2.789474,733.288947,35.675263,2414.653684,591475.315789,1585327.0,993851.9,1431674.0,0.000182,0.001115,0.002566,6.127418


In [119]:
temp_agro_industry_and_commerce.columns

Index(['Product_category_name', 'YearMonth', 'Review_score', 'Price',
       'Freight_value', 'Payment_value', 'time_diff_purchase_carrier_second',
       'time_diff_purchase_customer_second',
       'time_diff_carrier_customer_second',
       'time_diff_customer_delivery_second', 'count_ratio', 'price_ratio',
       'payment_ratio', 'value_ratio'],
      dtype='object')

In [120]:
temp_agro_industry_and_commerce.shape

(18, 14)

In [121]:
# temp_agro_industry_and_commerce["future"] = 1

In [122]:
# for i in range(1,10):
#     temp_agro_industry_and_commerce.loc[i,"future"] = i

In [123]:
temp_agro_industry_and_commerce

Unnamed: 0,Product_category_name,YearMonth,Review_score,Price,Freight_value,Payment_value,time_diff_purchase_carrier_second,time_diff_purchase_customer_second,time_diff_carrier_customer_second,time_diff_customer_delivery_second,count_ratio,price_ratio,payment_ratio,value_ratio
20,agro_industry_and_commerce,201801,4.333333,21.99,13.93,56.393333,238410.666667,724174.3,485763.7,2258004.0,2.9e-05,5e-06,9e-06,0.18375
21,agro_industry_and_commerce,201802,4.111111,29.868889,14.942222,36.788889,214357.666667,1078041.0,863683.7,2365468.0,8.6e-05,2.2e-05,1.9e-05,0.249587
22,agro_industry_and_commerce,201803,2.0,40.995,14.35,55.345,152831.0,803211.0,650380.0,1686158.0,1.9e-05,7e-06,6e-06,0.342557
23,agro_industry_and_commerce,201805,4.75,394.985,45.95,440.935,227591.0,1034258.0,806667.5,2051464.0,3.8e-05,0.000126,9.9e-05,3.300525
24,agro_industry_and_commerce,201806,1.0,1390.0,32.05,1422.05,688547.0,862409.0,173862.0,1681205.0,1e-05,0.000111,8e-05,11.614945
25,agro_industry_and_commerce,201807,5.0,1180.0,19.47,1199.47,332744.0,407485.0,74741.0,716445.0,1e-05,9.4e-05,6.7e-05,9.860169
26,agro_industry_and_commerce,201808,5.0,22.0,34.15,224.6,78253.0,1611335.0,1533082.0,3034032.0,3.8e-05,7e-06,5e-05,0.183834
27,agro_industry_and_commerce,201809,3.25,498.4975,31.425,741.7525,146451.25,886186.5,739735.2,2033410.0,3.8e-05,0.00016,0.000166,4.165483
28,agro_industry_and_commerce,201810,4.2,670.056,41.63,711.686,240308.6,919690.0,679381.4,1792707.0,4.8e-05,0.000268,0.000199,5.599039
29,agro_industry_and_commerce,201811,2.789474,733.288947,35.675263,2414.653684,591475.315789,1585327.0,993851.9,1431674.0,0.000182,0.001115,0.002566,6.127418


In [124]:


# 이전 월의 평균 가격을 구하기 위해 현재 월의 가격을 한 칸씩 밑으로 이동합니다.
temp_agro_industry_and_commerce['Previous_month_price'] = temp_agro_industry_and_commerce['Price'].shift(1)

# 가격 상승률을 계산합니다.
# monthly_mean_price['Price_increase_rate'] = (monthly_mean_price['Price'] - monthly_mean_price['Previous_month_price']) / monthly_mean_price['Previous_month_price']

# 결과 확인
# print(monthly_mean_price)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_agro_industry_and_commerce['Previous_month_price'] = temp_agro_industry_and_commerce['Price'].shift(1)


In [125]:
temp_agro_industry_and_commerce

Unnamed: 0,Product_category_name,YearMonth,Review_score,Price,Freight_value,Payment_value,time_diff_purchase_carrier_second,time_diff_purchase_customer_second,time_diff_carrier_customer_second,time_diff_customer_delivery_second,count_ratio,price_ratio,payment_ratio,value_ratio,Previous_month_price
20,agro_industry_and_commerce,201801,4.333333,21.99,13.93,56.393333,238410.666667,724174.3,485763.7,2258004.0,2.9e-05,5e-06,9e-06,0.18375,
21,agro_industry_and_commerce,201802,4.111111,29.868889,14.942222,36.788889,214357.666667,1078041.0,863683.7,2365468.0,8.6e-05,2.2e-05,1.9e-05,0.249587,21.99
22,agro_industry_and_commerce,201803,2.0,40.995,14.35,55.345,152831.0,803211.0,650380.0,1686158.0,1.9e-05,7e-06,6e-06,0.342557,29.868889
23,agro_industry_and_commerce,201805,4.75,394.985,45.95,440.935,227591.0,1034258.0,806667.5,2051464.0,3.8e-05,0.000126,9.9e-05,3.300525,40.995
24,agro_industry_and_commerce,201806,1.0,1390.0,32.05,1422.05,688547.0,862409.0,173862.0,1681205.0,1e-05,0.000111,8e-05,11.614945,394.985
25,agro_industry_and_commerce,201807,5.0,1180.0,19.47,1199.47,332744.0,407485.0,74741.0,716445.0,1e-05,9.4e-05,6.7e-05,9.860169,1390.0
26,agro_industry_and_commerce,201808,5.0,22.0,34.15,224.6,78253.0,1611335.0,1533082.0,3034032.0,3.8e-05,7e-06,5e-05,0.183834,1180.0
27,agro_industry_and_commerce,201809,3.25,498.4975,31.425,741.7525,146451.25,886186.5,739735.2,2033410.0,3.8e-05,0.00016,0.000166,4.165483,22.0
28,agro_industry_and_commerce,201810,4.2,670.056,41.63,711.686,240308.6,919690.0,679381.4,1792707.0,4.8e-05,0.000268,0.000199,5.599039,498.4975
29,agro_industry_and_commerce,201811,2.789474,733.288947,35.675263,2414.653684,591475.315789,1585327.0,993851.9,1431674.0,0.000182,0.001115,0.002566,6.127418,670.056


In [126]:
temp_agro_industry_and_commerce_min = temp_agro_industry_and_commerce[temp_agro_industry_and_commerce.value_ratio==0.1837501067222725]

In [127]:
temp_agro_industry_and_commerce_max = temp_agro_industry_and_commerce[temp_agro_industry_and_commerce.YearMonth=="201806"]

In [128]:
temp_agro_industry_and_commerce_min

Unnamed: 0,Product_category_name,YearMonth,Review_score,Price,Freight_value,Payment_value,time_diff_purchase_carrier_second,time_diff_purchase_customer_second,time_diff_carrier_customer_second,time_diff_customer_delivery_second,count_ratio,price_ratio,payment_ratio,value_ratio,Previous_month_price
20,agro_industry_and_commerce,201801,4.333333,21.99,13.93,56.393333,238410.666667,724174.333333,485763.666667,2258004.0,2.9e-05,5e-06,9e-06,0.18375,


In [129]:
temp_agro_industry_and_commerce_max

Unnamed: 0,Product_category_name,YearMonth,Review_score,Price,Freight_value,Payment_value,time_diff_purchase_carrier_second,time_diff_purchase_customer_second,time_diff_carrier_customer_second,time_diff_customer_delivery_second,count_ratio,price_ratio,payment_ratio,value_ratio,Previous_month_price
24,agro_industry_and_commerce,201806,1.0,1390.0,32.05,1422.05,688547.0,862409.0,173862.0,1681205.0,1e-05,0.000111,8e-05,11.614945,394.985


In [130]:
temp_product_group

Unnamed: 0,Product_category_name,YearMonth,Review_score,Price,Freight_value,Payment_value,time_diff_purchase_carrier_second,time_diff_purchase_customer_second,time_diff_carrier_customer_second,time_diff_customer_delivery_second,count_ratio,price_ratio,payment_ratio,value_ratio
0,Unknown,201710,5.000000,32.945000,15.015000,47.960000,1.023936e+06,1.643517e+06,6.195815e+05,3.500696e+06,0.000019,0.000005,0.000005,0.275291
1,Unknown,201801,3.833333,133.636667,16.257500,160.707500,3.631958e+05,9.380359e+05,5.748401e+05,2.819958e+06,0.000115,0.000128,0.000108,1.116678
2,Unknown,201802,3.910714,152.105714,18.253929,184.743393,5.464914e+05,1.151913e+06,6.054215e+05,2.146633e+06,0.000536,0.000682,0.000579,1.271007
3,Unknown,201803,4.096774,111.628226,16.248387,141.522742,2.890181e+05,1.047002e+06,7.579839e+05,1.769171e+06,0.000594,0.000554,0.000491,0.932774
4,Unknown,201804,4.139241,125.618987,20.372152,151.142911,3.208460e+05,1.466365e+06,1.145519e+06,2.096951e+06,0.000756,0.000794,0.000668,1.049682
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1191,watches_gifts,201903,3.690141,228.547535,20.343967,249.038779,2.616281e+05,1.682952e+06,1.421324e+06,1.729328e+06,0.004079,0.007790,0.005934,1.909761
1192,watches_gifts,201904,4.241779,185.727215,15.284275,205.954623,2.800097e+05,1.116989e+06,8.369788e+05,2.052820e+06,0.004950,0.007683,0.005955,1.551951
1193,watches_gifts,201905,4.022117,190.958894,13.866746,230.024882,3.181820e+05,1.160220e+06,8.420382e+05,1.861764e+06,0.006061,0.009672,0.008144,1.595667
1194,watches_gifts,201906,4.230290,178.123195,18.360290,217.650664,2.387331e+05,8.305463e+05,5.918132e+05,2.148818e+06,0.004615,0.006869,0.005868,1.488411


In [131]:
# 전월별의 차이를 봐서 기대값을 올려가보자

In [133]:
# import matplotlib.pyplot as plt
# x=np.arange(27)
# width = 0.35
# fig, axes = plt.subplots()
# axes.bar(x - width/2, test_city_ratio_earn["ratio"], width, alpha = 0.5) #물품비율
# axes.bar(x + width/2, test_city_ratio_earn["earn_ratio"], width, alpha = 0.8) #매출비율
# plt.xticks(x)
# axes.set_xticklabels(test_city_ratio_earn["Customer_state"])



In [134]:
# value_ratio 기준으로 정렬하기
temp_product_group_sorted= temp_product_group.sort_values(["value_ratio"],ascending=False)

In [136]:
# temp_product_group_sorted