### **카드데이터 추출**

In [27]:
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf

Reading package lists... Done
Building dependency tree       
Reading state information... Done
fonts-nanum is already the newest version (20170925-1).
0 upgraded, 0 newly installed, 0 to remove and 14 not upgraded.
/usr/share/fonts: caching, new cache contents: 0 fonts, 1 dirs
/usr/share/fonts/truetype: caching, new cache contents: 0 fonts, 3 dirs
/usr/share/fonts/truetype/humor-sans: caching, new cache contents: 1 fonts, 0 dirs
/usr/share/fonts/truetype/liberation: caching, new cache contents: 16 fonts, 0 dirs
/usr/share/fonts/truetype/nanum: caching, new cache contents: 10 fonts, 0 dirs
/usr/local/share/fonts: caching, new cache contents: 0 fonts, 0 dirs
/root/.local/share/fonts: skipping, no such directory
/root/.fonts: skipping, no such directory
/var/cache/fontconfig: cleaning cache directory
/root/.cache/fontconfig: not cleaning non-existent cache directory
/root/.fontconfig: not cleaning non-existent cache directory
fc-cache: succeeded


In [28]:
import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import seaborn as sns
import math
from datetime import date, timedelta

%matplotlib inline

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
card = pd.read_csv('/content/drive/Shared drives/경제정보분석/KT_data_20200717/card_20200717.csv')

In [None]:
card.mrhst_induty_cl_nm.unique()

In [None]:
card.head()

### **이태원 데이터 추출**

In [None]:
itaewon = card[(card.adstrd_nm == '이태원1동') | (card.adstrd_nm == '이태원2동')]
itaewon.head()

In [None]:
itaewon_need = itaewon[['receipt_dttm', 'adstrd_nm', 'mrhst_induty_cl_nm', 'salamt']]
itaewon_need.columns = ['date', 'town', 'category', 'sale']
itaewon_need

In [None]:
#일자, 카테고리 별로 groupby
itaewon_category = itaewon_need.groupby(['date', 'category'])
itaewon_category = itaewon_category.agg({'sale': ['sum']})

itaewon_category = itaewon_category.reset_index()
itaewon_category

### **5월 8일 기준 2주 전후 분석**

In [None]:
itaewon_before = itaewon_category[(itaewon_category.date >= 20200424) & (itaewon_category.date <= 20200508)]
itaewon_after = itaewon_category[(itaewon_category.date >= 20200509) & (itaewon_category.date <= 20200523)]

print(itaewon_before)
print()
print(itaewon_after)

In [None]:
#이태원 집단감염 이전 2주
itaewon_before_sum = itaewon_before.groupby(['category']).sum()
itaewon_before_sum = itaewon_before_sum['sale']
itaewon_before_sum

In [None]:
#이태원 집단감염 이후 2주
itaewon_after_sum = itaewon_after.groupby(['category']).sum()
itaewon_after_sum = itaewon_after_sum['sale']
itaewon_after_sum

In [None]:
itaewon_all = itaewon_before_sum.merge(itaewon_after_sum, left_index=True, right_index=True)
itaewon_all.columns = ['before', 'after']
itaewon_all['difference'] = itaewon_all.after - itaewon_all.before
itaewon_all['pct_ch'] = itaewon_all.difference / itaewon_all.before

itaewon_all

In [None]:
#차이 및 변화율이 가장 큰 상위 카테고리
print(itaewon_all.loc[itaewon_all['difference'].idxmax()])
print()
print(itaewon_all.loc[itaewon_all['pct_ch'].idxmax()])

In [None]:
#차이 및 변화율이 가장 작은 하위 카테고리
print(itaewon_all.loc[itaewon_all['difference'].idxmin()])
print()
print(itaewon_all.loc[itaewon_all['pct_ch'].idxmin()])

In [None]:
#매출액 변화량으로 sort
itaewon_sort_diff = itaewon_all.sort_values(by=['difference'])
itaewon_sort_diff

## **매출 변화량**이* 음수*로 나타난 상위 카테고리(매출액 감소): 

서양음식, 일반한식, 주점, 유흥주점, 편의점

## **매출 변화량**이* 양수*로 나타난 상위 카테고리(매출액 증가): 

일반 가구, 유아원, 의료 용품, 수입자동차, 기타회원제형태업소4

In [None]:
#매출액 변화율로 sort
itaewon_sort_pct = itaewon_all.sort_values(by=['pct_ch'])
itaewon_sort_pct

## **매출 변화율**이* 음수*로 나타난 상위 카테고리: 

통신 기기, 유흥주점, 완구점, 내의판매점, 노래방

## **매출 변화율**이* 양수*로 나타난 상위 카테고리: 

문화취미기타, 기타건강식, 페인트, 유아원, 사무서비스

------------------------------------------------------------------------------------------

### **매출 변화율이 높은 카테고리들 plot**

In [None]:
itaewon_plot = pd.concat([itaewon_sort_pct.head(), itaewon_sort_pct.tail()])
itaewon_plot

In [None]:
plt.rc('font', family='NanumBarunGothic') 

In [None]:
import seaborn as sns
import matplotlib.font_manager as fm

plt.rcParams["figure.figsize"] = (10, 10)

color = []
for i in np.arange(len(itaewon_plot.difference)) :
    if (itaewon_plot.difference[i] > 0) : 
        color.append('lightblue')
    else : 
        color.append('lightpink')

ax = sns.scatterplot(itaewon_plot.difference, itaewon_plot.pct_ch, s = np.abs(itaewon_plot.pct_ch) * 1000, 
                     color = color, alpha=0.4, edgecolors="grey", linewidth=2, legend = 'full')


#For each point, add text inside the bubble
for line in range(0,itaewon_plot.shape[0]):
     ax.text(itaewon_plot.difference[line], itaewon_plot.pct_ch[line], itaewon_plot.index[line], 
             horizontalalignment='center', size='large', color='black', weight='semibold', rotation = 40)

plt.title("\n<Itaewon> Strongly Affected Categories\n", fontsize = 20)
plt.xlabel("Difference in sale", fontsize = 15)
plt.ylabel("Percentage change in sale", fontsize = 15)
#plt.xticks(color = 'w')
#plt.yticks(color = 'w')
axes = plt.gca()
axes.yaxis.grid()

plt.show()

In [None]:
plt.bar(itaewon_plot.index, itaewon_plot.pct_ch, color = color)
plt.title("\n<Itaewon> Strongly Affected Categories\n", fontsize=20)
plt.xlabel('Category', fontsize=18)
plt.ylabel('Percentage Change in Sales', fontsize=18)
plt.xticks(fontsize=15, rotation = 45)

axes = plt.gca()
axes.yaxis.grid()
plt.show()


### **서초구에 대해서도 같은 코드 진행**

In [None]:
seocho = card[(card.adstrd_nm == '서초1동') | (card.adstrd_nm == '서초2동') | (card.adstrd_nm == '서초3동') | (card.adstrd_nm == '서초4동')]

seocho_need = seocho[['receipt_dttm', 'adstrd_nm', 'mrhst_induty_cl_nm', 'salamt']]
seocho_need.columns = ['date', 'town', 'category', 'sale']
seocho_category = seocho_need.groupby(['date', 'category'])
seocho_category = seocho_category.agg({'sale': ['sum']})

seocho_category = seocho_category.reset_index()

seocho_before = seocho_category[(seocho_category.date >= 20200424) & (seocho_category.date <= 20200508)]
seocho_after = seocho_category[(seocho_category.date >= 20200509) & (seocho_category.date <= 20200523)]

seocho_before_sum = seocho_before.groupby(['category']).sum()
seocho_before_sum = seocho_before_sum['sale']

seocho_after_sum = seocho_after.groupby(['category']).sum()
seocho_after_sum = seocho_after_sum['sale']

seocho_all = seocho_before_sum.merge(seocho_after_sum, left_index=True, right_index=True)
seocho_all.columns = ['before', 'after']
seocho_all['difference'] = seocho_all.after - seocho_all.before
seocho_all['pct_ch'] = seocho_all.difference / seocho_all.before

seocho_sort_diff = seocho_all.sort_values(by=['difference'])
seocho_sort_pct = seocho_all.sort_values(by=['pct_ch'])

seocho_plot = pd.concat([seocho_sort_pct.head(), seocho_sort_pct.tail()])

color = []
for i in np.arange(len(seocho_plot.difference)) :
    if (seocho_plot.difference[i] > 0) : 
        color.append('lightblue')
    else : 
        color.append('lightpink')

plt.bar(seocho_plot.index, seocho_plot.pct_ch, color = color)
plt.title("\n<Seocho> Strongly Affected Categories\n", fontsize=20)
plt.xlabel('Category', fontsize=18)
plt.ylabel('Percentage Change in Sales', fontsize=18)
plt.xticks(fontsize=15, rotation = 45)

axes = plt.gca()
axes.yaxis.grid()
plt.show()


In [None]:
seocho_plot_new = seocho_plot[:9]
seocho_plot_new

In [None]:
color = []
for i in np.arange(len(seocho_plot_new.difference)) :
    if (seocho_plot_new.difference[i] > 0) : 
        color.append('lightblue')
    else : 
        color.append('lightpink')

plt.bar(seocho_plot_new.index, seocho_plot_new.pct_ch, color = color)
plt.title("\n<Seocho> Strongly Affected Categories\n", fontsize=20)
plt.xlabel('Category', fontsize=18)
plt.ylabel('Percentage Change in Sales', fontsize=18)
plt.xticks(fontsize=15, rotation = 45)

axes = plt.gca()
axes.yaxis.grid()
plt.show()


In [None]:
%%capture
!wget -nc https://raw.githubusercontent.com/brpy/colab-pdf/master/colab_pdf.py
from colab_pdf import colab_pdf
colab_pdf('기말_이태원분석.ipynb')