In [24]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.pool import NullPool
import matplotlib as plt
import matplotlib.pylab as plt
import seaborn as sns
from tqdm import tqdm
from PIL import Image
import requests
from io import BytesIO


In [74]:
q = '''
SELECT search_term
FROM bun_log_db.app_event_type_search
WHERE YEAR||MONTH||DAY = '20200915'
  AND event_action = 'search'
GROUP BY 1
ORDER BY count(*) DESC
LIMIT 200
'''

top_keywords = pd.read_sql(q, con=bun_dw)

In [76]:
keyword_list = ','.join('\''+str(i)+'\'' for i in top_keywords['search_term'])

q = '''
SELECT keyword,
       pids
FROM bun_log_db.ca_event_type_searched_pids_v2
WHERE YEAR||MONTH||DAY = '20200915'
  AND keyword IN ({})
'''.format(keyword_list)

df = pd.read_sql(q, con=bun_dw)

In [89]:
keyword_pids = {}
keyword_length = {}
pid_set = set()
for index, row in df.iterrows():
    keyword = row['keyword']
    pids = [i[8:-1] if 'pid' in i else i[1:-1] for i in row['pids'][1:-1].split(',')]
    for p in pids:
        pid_set.add(p)
    if keyword in keyword_pids:
        keyword_pids[keyword] += pids
    else:
        keyword_pids[keyword] = pids

In [39]:
# with open('keyword_pids.pickle', 'rb') as handle:
#     keyword_pids = pickle.load(handle)

In [90]:
for keyword in keyword_pids:
    keyword_pids[keyword] = list(set(keyword_pids[keyword]))

for keyword in keyword_pids:
    size = len(keyword_pids[keyword])
    keyword_length[keyword] = [size]

In [91]:
import pickle

with open('keyword_pids.pickle', 'wb') as p:
    pickle.dump(keyword_pids, p, protocol=pickle.HIGHEST_PROTOCOL)


In [92]:
keyword_result = pd.DataFrame.from_dict(keyword_length).T.reset_index()

In [93]:
keyword_result.columns = ['keyword', 'output_num']

In [94]:
keyword_result.sort_values('output_num', ascending=False).to_csv('keyword_result.csv', index=False)

In [100]:
keyword_pid_uid = {}
for keyword, pids in keyword_pids.items():
    pid_list = ','.join('\''+str(i)+'\'' for i in pids if i != '' )
    q = '''
    SELECT p.id AS pid,
           p.uid
    FROM service1_quicket.product_info p
    WHERE id IN ({})
      AND id NOT IN
        (SELECT DISTINCT target_id
         FROM warehouse.vw_ad_view
         WHERE ad_type IN ('상점UP',
                           '파워UP',
                           '슈퍼UP')
           AND to_char(ad_date, 'YYYY/MM/dd') = '2020/09/15' );
    '''.format(pid_list)
    product_info = pd.read_sql(q, con=bun_dw)
    keyword_pid_uid[keyword] = product_info

In [101]:
import pickle

with open('keyword_pid_uid.pickle', 'wb') as p:
    pickle.dump(keyword_pid_uid, p, protocol=pickle.HIGHEST_PROTOCOL)


In [108]:
keyword_normal_result_temp = {}
for keyword, df in keyword_pid_uid.items():
    keyword_normal_result_temp[keyword] = [df['pid'].nunique()]
    
keyword_normal_result = pd.DataFrame.from_dict(keyword_normal_result_temp).T.reset_index()
keyword_normal_result.columns = ['keyword', 'normal_num']

result_df = pd.merge(keyword_result, keyword_normal_result, on = 'keyword')

In [119]:
result_df.sort_values('normal_num', ascending=False).to_csv('result_df.csv', index=False)

In [142]:
top_keyword_list = result_df.sort_values('normal_num', ascending=False)['keyword'][:150].tolist()

In [143]:
from tqdm import tqdm


uid_percentage = {}
for keyword, df in tqdm(keyword_pid_uid.items()):
    if keyword in top_keyword_list:
        outof = len(df)
        uid_percentage[keyword] = df.groupby('uid').agg(lambda x: len(x)/outof).sort_values('pid', ascending=False).reset_index()

100%|██████████| 457/457 [00:03<00:00, 139.79it/s]


In [158]:
import numpy as np

for keyword, df in tqdm(uid_percentage.items()):
    uid_num = len(df)
    u_l = np.array(list(range(1, uid_num+1)))/uid_num
    p_l = []
    for i in range(1, uid_num+1):
        p_l.append(sum(df['pid'][:i]))
    
    df['uid_top_percent'] = u_l
    df['pid_percent'] = p_l

100%|██████████| 150/150 [03:38<00:00,  1.46s/it]


In [160]:
import pickle

with open('uid_percentage.pickle', 'wb') as p:
    pickle.dump(uid_percentage, p, protocol=pickle.HIGHEST_PROTOCOL)


In [162]:
top_keyword_list

['나이키',
 '구찌',
 '원피스',
 '맨투맨',
 '바람막이',
 '폴로',
 '샤넬',
 '루이비통',
 '버버리',
 '지갑',
 '아이폰',
 '후드티',
 '가디건',
 '시계',
 '후드집업',
 '빈티지',
 '클러치',
 '디올',
 '방탄',
 '에어팟',
 '톰브라운',
 '카드지갑',
 '갤럭시',
 '나이키 ',
 '스톤',
 '스톤아일랜드',
 '폴로 ',
 '방탄소년단',
 '발렌시아가',
 '나이키 바람막이',
 '자전거',
 '피규어',
 '이지부스트',
 '슈프림',
 '노트북',
 '오프화이트',
 '칼하트',
 '교신',
 '방탄소년단 ',
 '롤',
 '스투시',
 '루이비통 ',
 '아이폰x',
 '컴퓨터',
 '아이패드',
 '방탄 ',
 '컨버스',
 '꼼데가르송',
 '아이폰 ',
 '스톤아일랜드 ',
 '닌텐도',
 '몽클레어 패딩',
 '버즈',
 '닌텐도 스위치',
 '삽니다',
 '레고',
 '스타벅스',
 '몽클레어',
 '마르지엘라',
 '아이폰11',
 '아이폰8',
 '톰브라운 ',
 '아이즈원',
 '무스너클',
 '폴로 니트',
 '아이즈원 ',
 '닌텐도스위치',
 '디스이즈네버댓',
 '피규어 ',
 '축구화',
 '모니터',
 '조던1',
 'cos',
 '에어팟2',
 '원피스 피규어',
 '베이프',
 '세븐틴',
 '아이폰7',
 '14k',
 '나이키 맨투맨',
 '에어팟 프로',
 '오버워치',
 '오토바이',
 's10',
 '샤넬 ',
 '노트10',
 '향수',
 '방탄 포카',
 '나이키 후드집업',
 '비비안웨스트우드',
 '냉장고',
 '아이폰xs',
 '골든구스',
 '토이스토리',
 '롤 계정',
 '토이스토리 ',
 '레드벨벳',
 '나이키 후드티',
 '에어팟프로',
 '미스치프',
 '무료나눔',
 '로렉스',
 '기프티콘',
 '갤럭시s10',
 '구찌 ',
 '발렌시아가 ',
 '아이폰se',
 '파타고니아',
 '버버리 ',
 '오프화이트 ',
 '톰브라운

In [323]:
with open('uid_percentage.pickle', 'rb') as handle:
    uid_percentage = pickle.load(handle)

In [324]:
percentage_temp = {}
percentage_temp['keyword'] = []
percentage_temp['pid_cnt'] = []
for i in range(1, 101):
    percentage_temp[str(i)] = []

In [325]:
for keyword, df in tqdm(uid_percentage.items()):
    percentage_temp['keyword'] += [keyword]
    percentage_temp['pid_cnt'] += [keyword_pid_uid[keyword]['pid'].nunique()]
    for i in range(1, 101):
        percentage_temp[str(i)] += [df[df['uid_top_percent'] <= (i/100)].iloc[-1]['pid_percent'].tolist()]




  0%|          | 0/150 [00:00<?, ?it/s][A[A[A


  1%|▏         | 2/150 [00:00<00:12, 11.86it/s][A[A[A


  3%|▎         | 4/150 [00:00<00:12, 12.10it/s][A[A[A


  4%|▍         | 6/150 [00:00<00:11, 12.59it/s][A[A[A


  5%|▌         | 8/150 [00:00<00:10, 12.94it/s][A[A[A


  7%|▋         | 10/150 [00:00<00:10, 13.21it/s][A[A[A


  8%|▊         | 12/150 [00:00<00:10, 12.87it/s][A[A[A


  9%|▉         | 14/150 [00:01<00:10, 12.76it/s][A[A[A


 11%|█         | 16/150 [00:01<00:10, 12.83it/s][A[A[A


 12%|█▏        | 18/150 [00:01<00:11, 11.82it/s][A[A[A


 13%|█▎        | 20/150 [00:01<00:10, 12.17it/s][A[A[A


 15%|█▍        | 22/150 [00:01<00:09, 12.80it/s][A[A[A


 16%|█▌        | 24/150 [00:01<00:09, 13.13it/s][A[A[A


 17%|█▋        | 26/150 [00:02<00:09, 13.32it/s][A[A[A


 19%|█▊        | 28/150 [00:02<00:08, 13.62it/s][A[A[A


 20%|██        | 30/150 [00:02<00:08, 13.46it/s][A[A[A


 21%|██▏       | 32/150 [00:02<00:09, 13.01it/s][

In [326]:
percentage_temp = pd.DataFrame.from_dict(percentage_temp)

In [328]:
col = ['keyword', 'pid_cnt']
for i in range(1, 101):
    col.append('상위'+str(i)+'%')

In [329]:
percentage_temp.columns = col

In [330]:
percentage_temp.sort_values('pid_cnt', ascending=False)

Unnamed: 0,keyword,pid_cnt,상위1%,상위2%,상위3%,상위4%,상위5%,상위6%,상위7%,상위8%,...,상위91%,상위92%,상위93%,상위94%,상위95%,상위96%,상위97%,상위98%,상위99%,상위100%
16,나이키,60189,0.246121,0.301861,0.338727,0.367337,0.391427,0.410866,0.430071,0.446237,...,0.956671,0.961489,0.966307,0.971108,0.975926,0.980744,0.985546,0.990364,0.995182,1.0
46,구찌,44977,0.339329,0.421838,0.470062,0.502368,0.526714,0.546368,0.562376,0.577673,...,0.965493,0.969318,0.973142,0.976988,0.980812,0.984659,0.988483,0.992329,0.996154,1.0
51,원피스,44418,0.121100,0.167747,0.203521,0.233824,0.261561,0.284749,0.308051,0.328088,...,0.958170,0.962808,0.967446,0.972106,0.976744,0.981404,0.986042,0.990702,0.995340,1.0
10,맨투맨,34525,0.200579,0.237596,0.263896,0.287299,0.306705,0.324258,0.341810,0.359363,...,0.947314,0.953164,0.959015,0.964866,0.970717,0.976568,0.982419,0.988269,0.994120,1.0
0,바람막이,29908,0.244784,0.313662,0.357396,0.389494,0.414605,0.434533,0.451284,0.466230,...,0.954962,0.959977,0.964959,0.969975,0.974990,0.979972,0.984987,0.989969,0.994985,1.0
29,폴로,28891,0.316881,0.395071,0.442075,0.475131,0.501471,0.522550,0.540895,0.556644,...,0.966979,0.970648,0.974317,0.977986,0.981655,0.985324,0.988993,0.992662,0.996331,1.0
55,샤넬,26968,0.281853,0.355273,0.404554,0.441449,0.471818,0.497145,0.518726,0.537489,...,0.967591,0.971188,0.974785,0.978382,0.981979,0.985575,0.989172,0.992769,0.996366,1.0
3,루이비통,26885,0.343686,0.427822,0.478594,0.513967,0.541454,0.563437,0.582593,0.599330,...,0.969686,0.973070,0.976418,0.979803,0.983150,0.986535,0.989883,0.993268,0.996615,1.0
8,버버리,24108,0.324457,0.401195,0.448938,0.483449,0.511822,0.535134,0.555044,0.572466,...,0.968600,0.972084,0.975568,0.979053,0.982537,0.986021,0.989506,0.992990,0.996474,1.0
61,지갑,23963,0.219839,0.259024,0.284605,0.304344,0.322873,0.341527,0.354630,0.366982,...,0.944164,0.950382,0.956558,0.962776,0.968994,0.975170,0.981388,0.987564,0.993782,1.0


In [332]:
percentage_temp.sort_values('pid_cnt', ascending=False).to_csv('./percentage_temp.csv', index=False)

In [333]:
rank_temp = {}
rank_temp['keyword'] = []
rank_temp['pid_cnt'] = []
for i in range(1, 101):
    rank_temp[str(i)] = []
    
for keyword, df in tqdm(uid_percentage.items()):
    rank_temp['keyword'] += [keyword]
    rank_temp['pid_cnt'] += [keyword_pid_uid[keyword]['pid'].nunique()]
    df = df.sort_values('uid_top_percent', ascending=True)
    for i in range(1, 101):
        rank_temp[str(i)] += [df.loc[i-1, 'pid'].tolist()]




  0%|          | 0/150 [00:00<?, ?it/s][A[A[A


 12%|█▏        | 18/150 [00:00<00:00, 173.74it/s][A[A[A


 31%|███▏      | 47/150 [00:00<00:00, 195.44it/s][A[A[A


 51%|█████     | 76/150 [00:00<00:00, 215.72it/s][A[A[A


 74%|███████▍  | 111/150 [00:00<00:00, 243.03it/s][A[A[A


100%|██████████| 150/150 [00:00<00:00, 283.57it/s][A[A[A


In [334]:
rank_temp = pd.DataFrame.from_dict(rank_temp)

In [335]:
col = ['keyword', 'pid_cnt']
for i in range(1, 101):
    col.append(str(i)+'등')
rank_temp.columns = col
rank_temp

Unnamed: 0,keyword,pid_cnt,1등,2등,3등,4등,5등,6등,7등,8등,...,91등,92등,93등,94등,95등,96등,97등,98등,99등,100등
0,바람막이,29908,0.012371,0.009329,0.007189,0.006854,0.006219,0.006052,0.006018,0.005985,...,0.000936,0.000903,0.000903,0.000903,0.000903,0.000903,0.000869,0.000836,0.000836,0.000836
1,갤럭시,14287,0.014209,0.010429,0.009869,0.009869,0.008049,0.006859,0.006229,0.005390,...,0.000840,0.000840,0.000840,0.000840,0.000840,0.000840,0.000840,0.000840,0.000770,0.000770
2,마이멜로디,3148,0.026684,0.022554,0.014295,0.014295,0.013342,0.013024,0.012071,0.010801,...,0.001588,0.001588,0.001588,0.001588,0.001588,0.001588,0.001588,0.001588,0.001588,0.001588
3,루이비통,26885,0.021462,0.021425,0.018040,0.015362,0.014506,0.013539,0.011233,0.008704,...,0.001265,0.001227,0.001227,0.001227,0.001190,0.001190,0.001190,0.001190,0.001190,0.001153
4,컨버스,7392,0.016369,0.006223,0.005682,0.004194,0.002841,0.002841,0.002706,0.002570,...,0.000541,0.000541,0.000541,0.000541,0.000541,0.000541,0.000541,0.000541,0.000541,0.000541
5,아이패드,7688,0.011837,0.008715,0.004292,0.003902,0.003122,0.002601,0.002341,0.002081,...,0.000520,0.000520,0.000520,0.000520,0.000520,0.000520,0.000520,0.000520,0.000520,0.000520
6,세븐틴,5198,0.017314,0.011735,0.007311,0.005002,0.004810,0.004617,0.004425,0.004425,...,0.001539,0.001539,0.001539,0.001539,0.001539,0.001539,0.001539,0.001539,0.001539,0.001539
7,후드집업,18921,0.010042,0.006342,0.005919,0.005127,0.003911,0.003700,0.003171,0.003171,...,0.000423,0.000423,0.000423,0.000423,0.000423,0.000423,0.000423,0.000423,0.000423,0.000423
8,버버리,24108,0.026879,0.025344,0.011241,0.010329,0.010080,0.009997,0.009665,0.008255,...,0.001161,0.001120,0.001120,0.001120,0.001120,0.001120,0.001120,0.001120,0.001120,0.001120
9,아이폰se,3685,0.016282,0.015197,0.014111,0.013297,0.013026,0.011398,0.009498,0.009227,...,0.000814,0.000814,0.000814,0.000814,0.000814,0.000814,0.000814,0.000814,0.000814,0.000814


In [336]:
rank_temp.sort_values('pid_cnt', ascending=False).to_csv('./rank_temp.csv', index=False)

In [234]:
col = percentage_temp.columns[1:]

In [238]:
avg = []
for c in col:
    avg.append(round(percentage_temp[c].mean()*100))

In [250]:
avg_df = {'col': list(range(1,101)), 'average': avg}

In [251]:
temp = pd.DataFrame.from_dict(avg_df)

In [252]:
temp.to_csv('./temp.csv', index=False)

In [285]:
imp_dict = {}

for keyword, df in tqdm(keyword_pid_uid.items()):
    if keyword in top_keyword_list:
        pids = ','.join('\''+ str(i) +'\'' for i in df['pid'].unique())
        q = '''
        SELECT content_id as pid,
               count(*) AS imp_cnt
        FROM bun_log_db.app_event_type_impression
        WHERE content_id IN ({}) AND 
              page_id = '검색결과' AND
              ref_campaign IS NULL AND
              ref_term = {} AND 
              YEAR||MONTH||DAY = '20200915'
        GROUP BY 1
        '''.format(pids, '\''+keyword+'\'')
        temp_imp = pd.read_sql(q, con=bun_dw)
        df['pid'] = df['pid'].astype(str)
        temp_imp['pid'] = temp_imp['pid'].astype(str)
        imp = pd.merge(df, temp_imp, on='pid')
        imp_dict[keyword] = imp


  0%|          | 0/457 [00:00<?, ?it/s][A
  0%|          | 1/457 [00:13<1:40:37, 13.24s/it][A
  1%|          | 3/457 [00:30<1:29:34, 11.84s/it][A
  1%|          | 4/457 [00:49<1:45:24, 13.96s/it][A
  1%|          | 5/457 [01:02<1:42:51, 13.65s/it][A
  1%|▏         | 6/457 [01:32<2:21:02, 18.76s/it][A
  2%|▏         | 7/457 [02:06<2:54:04, 23.21s/it][A
  2%|▏         | 8/457 [02:29<2:52:40, 23.08s/it][A
  3%|▎         | 13/457 [02:41<2:05:07, 16.91s/it][A
  3%|▎         | 15/457 [02:55<1:42:15, 13.88s/it][A
  4%|▎         | 16/457 [03:14<1:53:58, 15.51s/it][A
  4%|▍         | 18/457 [03:27<1:33:16, 12.75s/it][A
  4%|▍         | 20/457 [03:58<1:38:29, 13.52s/it][A
  5%|▍         | 21/457 [04:11<1:37:43, 13.45s/it][A
  5%|▍         | 22/457 [04:24<1:36:37, 13.33s/it][A
  5%|▌         | 23/457 [04:37<1:36:20, 13.32s/it][A
  5%|▌         | 24/457 [05:05<2:07:41, 17.69s/it][A
  5%|▌         | 25/457 [05:19<1:58:49, 16.50s/it][A
  6%|▌         | 26/457 [05:35<1:57:44, 16.39

In [286]:
import pickle

with open('imp_dict.pickle', 'wb') as p:
    pickle.dump(imp_dict, p, protocol=pickle.HIGHEST_PROTOCOL)


In [306]:
uid_imp_cnt = {}
for keyword, df in tqdm(imp_dict.items()):
    total_imp_cnt = df['imp_cnt'].sum()
    data = df.groupby('uid')['imp_cnt'].sum().agg(lambda x: x/total_imp_cnt).sort_values(ascending=False).reset_index()
    uid_imp_cnt[keyword] = data


  0%|          | 0/150 [00:00<?, ?it/s][A
  9%|▊         | 13/150 [00:00<00:01, 123.19it/s][A
 21%|██        | 31/150 [00:00<00:00, 135.05it/s][A
 35%|███▍      | 52/150 [00:00<00:00, 147.54it/s][A
 50%|█████     | 75/150 [00:00<00:00, 164.41it/s][A
 67%|██████▋   | 101/150 [00:00<00:00, 184.10it/s][A
 83%|████████▎ | 125/150 [00:00<00:00, 197.89it/s][A
100%|██████████| 150/150 [00:00<00:00, 205.93it/s][A


In [344]:
uid_imp_cnt['로렉스']

Unnamed: 0,uid,imp_cnt,uid_top_percent,imp_cumulative_percent
0,12175475,0.092259,0.000676,0.092259
1,3272310,0.089521,0.001351,0.181781
2,11889581,0.081872,0.002027,0.263653
3,1022572,0.026159,0.002703,0.289812
4,4889507,0.021768,0.003378,0.311580
5,10779974,0.017623,0.004054,0.329203
6,9405839,0.014915,0.004730,0.344118
7,2833727,0.013385,0.005405,0.357503
8,450299,0.010341,0.006081,0.367845
9,3542601,0.009607,0.006757,0.377451


In [309]:
import numpy as np

for keyword, df in tqdm(uid_imp_cnt.items()):
    uid_num = len(df)
    u_l = np.array(list(range(1, uid_num+1)))/uid_num
    imp_l = []
    for i in range(1, uid_num+1):
        imp_l.append(sum(df['imp_cnt'][:i]))
    
    df['uid_top_percent'] = u_l
    df['imp_cumulative_percent'] = imp_l



  0%|          | 0/150 [00:00<?, ?it/s][A[A

  1%|          | 1/150 [00:06<15:01,  6.05s/it][A[A

  1%|▏         | 2/150 [00:07<11:27,  4.64s/it][A[A

  2%|▏         | 3/150 [00:07<08:06,  3.31s/it][A[A

  3%|▎         | 4/150 [00:10<07:44,  3.18s/it][A[A

  3%|▎         | 5/150 [00:11<06:12,  2.57s/it][A[A

  4%|▍         | 6/150 [00:12<05:01,  2.09s/it][A[A

  5%|▍         | 7/150 [00:12<03:39,  1.53s/it][A[A

  5%|▌         | 8/150 [00:17<05:38,  2.38s/it][A[A

  6%|▌         | 9/150 [00:19<05:42,  2.43s/it][A[A

  7%|▋         | 10/150 [00:20<04:13,  1.81s/it][A[A

  7%|▋         | 11/150 [00:29<09:33,  4.13s/it][A[A

  8%|▊         | 12/150 [00:30<07:06,  3.09s/it][A[A

  9%|▊         | 13/150 [00:32<06:39,  2.91s/it][A[A

  9%|▉         | 14/150 [00:34<05:57,  2.63s/it][A[A

 10%|█         | 15/150 [00:36<04:58,  2.21s/it][A[A

 11%|█         | 16/150 [00:36<03:36,  1.61s/it][A[A

 11%|█▏        | 17/150 [00:57<16:34,  7.48s/it][A[A

 12%|█▏ 

 97%|█████████▋| 146/150 [02:47<00:01,  3.05it/s][A[A

 98%|█████████▊| 147/150 [02:48<00:00,  3.70it/s][A[A

 99%|█████████▊| 148/150 [02:48<00:00,  3.87it/s][A[A

100%|██████████| 150/150 [02:48<00:00,  1.13s/it][A[A


In [317]:
imp_final = {}
imp_final['keyword'] = []
imp_final['uid_cnt'] = []
imp_final['pid_cnt'] = []
for i in range(1, 101):
    imp_final[str(i)] = []

for keyword, df in tqdm(uid_imp_cnt.items()):
    imp_final['keyword'] += [keyword]
    imp_final['uid_cnt'] += [len(df)]
    imp_final['pid_cnt'] += [keyword_pid_uid[keyword]['pid'].nunique()]
    for i in range(1, 101):
        imp_final[str(i)] += [df[df['uid_top_percent'] <= (i/100)].iloc[-1]['imp_cumulative_percent'].tolist()]



  0%|          | 0/150 [00:00<?, ?it/s][A[A

  1%|▏         | 2/150 [00:00<00:12, 12.14it/s][A[A

  3%|▎         | 4/150 [00:00<00:11, 12.43it/s][A[A

  4%|▍         | 6/150 [00:00<00:11, 12.79it/s][A[A

  5%|▌         | 8/150 [00:00<00:11, 12.77it/s][A[A

  7%|▋         | 10/150 [00:00<00:10, 12.79it/s][A[A

  8%|▊         | 12/150 [00:00<00:10, 12.68it/s][A[A

  9%|▉         | 14/150 [00:01<00:10, 12.51it/s][A[A

 11%|█         | 16/150 [00:01<00:10, 12.46it/s][A[A

 12%|█▏        | 18/150 [00:01<00:11, 11.33it/s][A[A

 13%|█▎        | 20/150 [00:01<00:11, 11.72it/s][A[A

 15%|█▍        | 22/150 [00:01<00:10, 12.13it/s][A[A

 16%|█▌        | 24/150 [00:01<00:10, 12.40it/s][A[A

 17%|█▋        | 26/150 [00:02<00:09, 12.53it/s][A[A

 19%|█▊        | 28/150 [00:02<00:09, 12.83it/s][A[A

 20%|██        | 30/150 [00:02<00:09, 13.06it/s][A[A

 21%|██▏       | 32/150 [00:02<00:09, 13.11it/s][A[A

 23%|██▎       | 34/150 [00:02<00:08, 13.01it/s][A[A

 24

In [318]:
imp_final = pd.DataFrame.from_dict(imp_final)

In [338]:
col = ['keyword', 'uid_cnt', 'pid_cnt']
for i in range(1, 101):
    col.append('상위'+str(i)+'%')
    
imp_final.columns=col

In [343]:
imp_final.sort_values('pid_cnt', ascending=False).to_csv('imp_final.csv', index=False)