# DataFrame 활용 (Pandas)
_본 자료는 안수찬 강사님의 파이썬을 활용한 업무자동화 Camp (fast campus)의 강의자료를 기반으로 만들어졌습니다._  
만든이 : 김보섭  

#### _**Pandas, mutagen library의 MP3 Class등을 활용합니다.**_ 


### DataFrame 활용

#### mp3_df DataFrame 생성

In [1]:
import os, sys
import pandas as pd
import shutil
from mutagen.mp3 import MP3 # Class를 불러온다

In [2]:
# mp3 파일이 아닌 쓸데없는 txt파일이 존재함을 확인할 수 있다.
os.listdir('./sample_mp3/')

['.ipynb_checkpoints',
 '2HG0Z4C2.mp3',
 '2W1KHXPI.mp3',
 '3KTIS7NN.mp3',
 'BWD5GM5Q.mp3',
 'DJJUFDQX.mp3',
 'IXMFB7H1.mp3',
 'README.txt',
 'RIS29UDF.mp3',
 'T51NGFL6.mp3',
 'WHYGK9R4.mp3',
 'YUF527VD.mp3']

In [3]:
# list comprehension을 이용하여 mp3 파일 목록만 뽑자
mp3_filenames = [
    filename
    for filename
    in os.listdir('./sample_mp3/')
    if filename.endswith('.mp3') # 특정 확장자를 가진 파일만 추출하는 방법
]
mp3_filenames

['2HG0Z4C2.mp3',
 '2W1KHXPI.mp3',
 '3KTIS7NN.mp3',
 'BWD5GM5Q.mp3',
 'DJJUFDQX.mp3',
 'IXMFB7H1.mp3',
 'RIS29UDF.mp3',
 'T51NGFL6.mp3',
 'WHYGK9R4.mp3',
 'YUF527VD.mp3']

In [4]:
# 위의 list로 DataFrame 생성
mp3_df = pd.DataFrame(mp3_filenames, columns = ['filename'])
mp3_df.head()

Unnamed: 0,filename
0,2HG0Z4C2.mp3
1,2W1KHXPI.mp3
2,3KTIS7NN.mp3
3,BWD5GM5Q.mp3
4,DJJUFDQX.mp3


In [5]:
mp3 = MP3('./sample_mp3/2HG0Z4C2.mp3')

In [6]:
mp3.get('TPE2')

TPE2(encoding=<Encoding.UTF16: 1>, text=['The Weeknd'])

#### mp3_df에 filepath column 추가

In [7]:
# 위의 과정을 DataFrame을 이용하여 효율적으로
mp3_df['filepath'] = \
mp3_df.filename.apply(lambda x : os.path.join('.','sample_mp3',x))
mp3_df.head()

Unnamed: 0,filename,filepath
0,2HG0Z4C2.mp3,.\sample_mp3\2HG0Z4C2.mp3
1,2W1KHXPI.mp3,.\sample_mp3\2W1KHXPI.mp3
2,3KTIS7NN.mp3,.\sample_mp3\3KTIS7NN.mp3
3,BWD5GM5Q.mp3,.\sample_mp3\BWD5GM5Q.mp3
4,DJJUFDQX.mp3,.\sample_mp3\DJJUFDQX.mp3


In [8]:
mp3 = MP3(mp3_df.iloc[0].filepath)
mp3.get('TIT2')

TIT2(encoding=<Encoding.UTF16: 1>, text=["Can't Feel My Face"])

#### mp3_df에 title, new_title, new_filepath 추가

In [9]:
# 정식 사이트 음원 => MP3 (가수, 라이센스, 가사, 제목, ...)
# BeautifulSoup (HTML Parser), MP3 (MP3 File Parser), Image Parser
# 타이틀 한번에 추가하고, 새로운 파일명과 경로만들기
mp3_df['title'] = mp3_df.filepath.apply(lambda x: MP3(x).get('TIT2').text[0])
mp3_df.head()

Unnamed: 0,filename,filepath,title
0,2HG0Z4C2.mp3,.\sample_mp3\2HG0Z4C2.mp3,Can't Feel My Face
1,2W1KHXPI.mp3,.\sample_mp3\2W1KHXPI.mp3,What Do You Mean
2,3KTIS7NN.mp3,.\sample_mp3\3KTIS7NN.mp3,Watch Me
3,BWD5GM5Q.mp3,.\sample_mp3\BWD5GM5Q.mp3,Cheerleader
4,DJJUFDQX.mp3,.\sample_mp3\DJJUFDQX.mp3,Lean on


In [10]:
mp3_df['new_filename'] = mp3_df.title.apply(lambda x : x + '.mp3')
mp3_df.head()

Unnamed: 0,filename,filepath,title,new_filename
0,2HG0Z4C2.mp3,.\sample_mp3\2HG0Z4C2.mp3,Can't Feel My Face,Can't Feel My Face.mp3
1,2W1KHXPI.mp3,.\sample_mp3\2W1KHXPI.mp3,What Do You Mean,What Do You Mean.mp3
2,3KTIS7NN.mp3,.\sample_mp3\3KTIS7NN.mp3,Watch Me,Watch Me.mp3
3,BWD5GM5Q.mp3,.\sample_mp3\BWD5GM5Q.mp3,Cheerleader,Cheerleader.mp3
4,DJJUFDQX.mp3,.\sample_mp3\DJJUFDQX.mp3,Lean on,Lean on.mp3


In [11]:
mp3_df['new_filepath'] = \
mp3_df.new_filename.apply(lambda x : os.path.join('.', 'mp3',x))
mp3_df.head()

Unnamed: 0,filename,filepath,title,new_filename,new_filepath
0,2HG0Z4C2.mp3,.\sample_mp3\2HG0Z4C2.mp3,Can't Feel My Face,Can't Feel My Face.mp3,.\mp3\Can't Feel My Face.mp3
1,2W1KHXPI.mp3,.\sample_mp3\2W1KHXPI.mp3,What Do You Mean,What Do You Mean.mp3,.\mp3\What Do You Mean.mp3
2,3KTIS7NN.mp3,.\sample_mp3\3KTIS7NN.mp3,Watch Me,Watch Me.mp3,.\mp3\Watch Me.mp3
3,BWD5GM5Q.mp3,.\sample_mp3\BWD5GM5Q.mp3,Cheerleader,Cheerleader.mp3,.\mp3\Cheerleader.mp3
4,DJJUFDQX.mp3,.\sample_mp3\DJJUFDQX.mp3,Lean on,Lean on.mp3,.\mp3\Lean on.mp3


#### mp3_df의 filepath column에 해당하는 mp3파일을 new_filepath에 복사하기

In [12]:
if 'mp3' in os.listdir():
    shutil.rmtree('./mp3')
os.mkdir('mp3')

In [13]:
for index, row in mp3_df.iterrows(): # dict, dict.items()와 유사
    filepath = row[1]
    new_filepath = row[4]
    shutil.copy2(filepath, new_filepath)    