In [1]:
import pandas as pd
import numpy as np

In [4]:
reserve_tb = pd.read_csv('./data/reserve.csv', encoding='UTF-8')

## 7. 전개
데이터 집계 결과를 표 형식으로 변환

### 7.1. 가로 데이터로 변환
레코드 형식 -> 표 형식

In [5]:
pd.pivot_table(
    reserve_tb,
    index='customer_id',
    columns='people_num',
    values='reserve_id',
    aggfunc=lambda x: len(x),
    fill_value=0
)

people_num,1,2,3,4
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
c_1,2,2,2,2
c_10,0,2,2,2
c_100,2,1,2,0
c_1000,1,0,0,1
c_101,2,1,1,1
...,...,...,...,...
c_994,1,0,0,0
c_995,2,2,1,3
c_996,0,4,3,0
c_997,0,1,1,0


### 7.2. 희소 행렬로 변환
희소 행렬: 대부분 요소의 값이 0이고 극히 일부만 값을 가지는 거대한 행렬

In [9]:
from scipy.sparse import csc_matrix

cnt_tb = reserve_tb \
    .groupby(['customer_id', 'people_num'])['reserve_id'].size() \
    .reset_index()
cnt_tb.columns = ['customer_id', 'people_num', 'rsv_cnt']

customer_id = pd.Categorical(cnt_tb['customer_id'])
people_num = pd.Categorical(cnt_tb['people_num'])

customer_id

[c_1, c_10, c_100, c_1000, c_101, ..., c_994, c_995, c_996, c_997, c_999]
Length: 888
Categories (888, object): [c_1, c_10, c_100, c_1000, ..., c_995, c_996, c_997, c_999]

In [7]:
result = csc_matrix(
    (cnt_tb['rsv_cnt'], (customer_id.codes, people_num.codes)),
    shape=(len(customer_id.categories), len(people_num.categories))
)
print(result)

  (0, 0)	2
  (2, 0)	2
  (3, 0)	1
  (4, 0)	2
  (6, 0)	3
  (9, 0)	2
  (10, 0)	3
  (11, 0)	4
  (13, 0)	1
  (16, 0)	1
  (18, 0)	1
  (19, 0)	3
  (20, 0)	1
  (21, 0)	1
  (22, 0)	2
  (23, 0)	1
  (24, 0)	4
  (25, 0)	2
  (27, 0)	2
  (29, 0)	2
  (30, 0)	3
  (32, 0)	1
  (36, 0)	1
  (37, 0)	1
  (40, 0)	1
  :	:
  (850, 3)	1
  (852, 3)	1
  (853, 3)	1
  (856, 3)	2
  (857, 3)	1
  (858, 3)	3
  (859, 3)	3
  (862, 3)	1
  (863, 3)	1
  (864, 3)	1
  (866, 3)	1
  (867, 3)	1
  (868, 3)	1
  (869, 3)	3
  (870, 3)	2
  (871, 3)	1
  (875, 3)	1
  (876, 3)	2
  (877, 3)	1
  (878, 3)	1
  (879, 3)	4
  (880, 3)	2
  (881, 3)	3
  (884, 3)	3
  (887, 3)	1
