## Melt
Pandas에서 melt 함수는 데이터의 형태를 변경하는 데 사용되는 '긴 형식(long format)'으로 변환하는 작업을 말합니다. 이는 여러 열에 걸쳐 있던 데이터를 하나의 열로 통합하고, 해당 데이터의 식별 변수를 다른 열로 표시하여, 데이터를 더욱 분석하기 용이한 형태로 만듭니다. melt는 데이터를 재구조화하는 데 유용하며, 특히 여러 변수가 하나의 열에 포함된 '키-값' 쌍 형태로 나타낼 때 많이 사용됩니다.

<br>

### Melt의 주요 매개변수
* id_vars: 위치를 그대로 유지할 열의 이름입니다. 이 열들의 데이터는 여러 행으로 반복됩니다.
* value_vars: 긴 형식으로 변환할 열의 이름입니다. 지정하지 않으면, id_vars를 제외한 모든 열이 value_vars로 간주됩니다.
* var_name: value_vars로 지정된 열의 이름을 저장할 열의 이름입니다. 기본값은 'variable'입니다.
* value_name: value_vars 열의 값이 저장될 열의 이름입니다. 기본값은 'value'입니다.

In [2]:
# 셀병합
import pandas as pd
df = pd.read_csv("Data/pew.csv")
df.sort_values("Don't know/refused")
df.sort_index()

Unnamed: 0,religion,<$10k,$10-20k,$20-30k,$30-40k,$40-50k,$50-75k,$75-100k,$100-150k,>150k,Don't know/refused
0,Agnostic,27,34,60,81,76,137,122,109,84,96
1,Atheist,12,27,37,52,35,70,73,59,74,76
2,Buddhist,27,21,30,34,33,58,62,39,53,54
3,Catholic,418,617,732,670,638,1116,949,792,633,1489
4,Don’t know/refused,15,14,15,11,10,35,21,17,18,116
5,Evangelical Prot,575,869,1064,982,881,1486,949,723,414,1529
6,Hindu,1,9,7,9,11,34,47,48,54,37
7,Historically Black Prot,228,244,236,238,197,223,131,81,78,339
8,Jehovah's Witness,20,27,24,24,21,30,15,11,6,37
9,Jewish,19,19,25,25,30,95,69,87,151,162


In [23]:
# 각 특성(컬럼)들이 성격이 독립적이어야함(다중공선성 문제)
df.melt(id_vars="religion", 
        var_name="income",
        value_name="count")

Unnamed: 0,year,artist,track,time,date.entered,wk1,wk2,wk3,wk4,wk5,...,wk67,wk68,wk69,wk70,wk71,wk72,wk73,wk74,wk75,wk76
0,2000,2 Pac,Baby Don't Cry (Keep...,4:22,2000-02-26,87,82.0,72.0,77.0,87.0,...,,,,,,,,,,
1,2000,2Ge+her,The Hardest Part Of ...,3:15,2000-09-02,91,87.0,92.0,,,...,,,,,,,,,,
2,2000,3 Doors Down,Kryptonite,3:53,2000-04-08,81,70.0,68.0,67.0,66.0,...,,,,,,,,,,
3,2000,3 Doors Down,Loser,4:24,2000-10-21,76,76.0,72.0,69.0,67.0,...,,,,,,,,,,
4,2000,504 Boyz,Wobble Wobble,3:35,2000-04-15,57,34.0,25.0,17.0,17.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
312,2000,Yankee Grey,Another Nine Minutes,3:10,2000-04-29,86,83.0,77.0,74.0,83.0,...,,,,,,,,,,
313,2000,"Yearwood, Trisha",Real Live Woman,3:55,2000-04-01,85,83.0,83.0,82.0,81.0,...,,,,,,,,,,
314,2000,Ying Yang Twins,Whistle While You Tw...,4:19,2000-03-18,95,94.0,91.0,85.0,84.0,...,,,,,,,,,,
315,2000,Zombie Nation,Kernkraft 400,3:30,2000-09-02,99,99.0,,,,...,,,,,,,,,,


In [50]:
bb=pd.read_csv("Data/billboard.csv") #미국 음악레이팅 점수
bb_melt = bb.melt(id_vars=["year", "artist", "track", "time", "date.entered"],
                  var_name="week",
                  value_name="score")

#bb_melt.dropna() # NA값이 있는 행은 삭제
bb_melt.dropna(subset="score")

Unnamed: 0,year,artist,track,time,date.entered,week,score
0,2000,2 Pac,Baby Don't Cry (Keep...,4:22,2000-02-26,wk1,87.0
1,2000,2Ge+her,The Hardest Part Of ...,3:15,2000-09-02,wk1,91.0
2,2000,3 Doors Down,Kryptonite,3:53,2000-04-08,wk1,81.0
3,2000,3 Doors Down,Loser,4:24,2000-10-21,wk1,76.0
4,2000,504 Boyz,Wobble Wobble,3:35,2000-04-15,wk1,57.0
...,...,...,...,...,...,...,...
19716,2000,Creed,Higher,5:16,1999-09-11,wk63,50.0
19833,2000,Lonestar,Amazed,4:25,1999-06-05,wk63,45.0
20033,2000,Creed,Higher,5:16,1999-09-11,wk64,50.0
20150,2000,Lonestar,Amazed,4:25,1999-06-05,wk64,50.0


In [53]:
ev = pd.read_csv("Data/country_timeseries.csv") # 에볼라 바이러스 생존자 및 사망자 수
ev

Unnamed: 0,Date,Day,Cases_Guinea,Cases_Liberia,Cases_SierraLeone,Cases_Nigeria,Cases_Senegal,Cases_UnitedStates,Cases_Spain,Cases_Mali,Deaths_Guinea,Deaths_Liberia,Deaths_SierraLeone,Deaths_Nigeria,Deaths_Senegal,Deaths_UnitedStates,Deaths_Spain,Deaths_Mali
0,1/5/2015,289,2776.0,,10030.0,,,,,,1786.0,,2977.0,,,,,
1,1/4/2015,288,2775.0,,9780.0,,,,,,1781.0,,2943.0,,,,,
2,1/3/2015,287,2769.0,8166.0,9722.0,,,,,,1767.0,3496.0,2915.0,,,,,
3,1/2/2015,286,,8157.0,,,,,,,,3496.0,,,,,,
4,12/31/2014,284,2730.0,8115.0,9633.0,,,,,,1739.0,3471.0,2827.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,3/27/2014,5,103.0,8.0,6.0,,,,,,66.0,6.0,5.0,,,,,
118,3/26/2014,4,86.0,,,,,,,,62.0,,,,,,,
119,3/25/2014,3,86.0,,,,,,,,60.0,,,,,,,
120,3/24/2014,2,86.0,,,,,,,,59.0,,,,,,,


In [95]:
ev
ev_melt = ev.melt(id_vars=["Date", "Day"],
                   var_name="week",
                    value_name="score")

ev['Cases_sum'] = ev.iloc[:,2:9].sum(axis=1)
ev['Deaths_sum'] = ev.iloc[:, 10:].sum(axis=1)

ev_melt=ev.melt(id_vars=['Date','Day','Cases_sum','Deaths_sum'],
               var_name="country",
               value_name="count")
ev_melt.dropna(subset="count")

ev_state = ev_melt['country'].str.split('_').str[0]
ev_country = ev_melt['country'].str.split('_').str[1]

ev_melt['state'] = ev_state
ev_melt['country'] = ev_country
ev_melt[['Date','Day','country','state','count']]

Unnamed: 0,Date,Day,country,state,count
0,1/5/2015,289,Guinea,Cases,2776.0
1,1/4/2015,288,Guinea,Cases,2775.0
2,1/3/2015,287,Guinea,Cases,2769.0
3,1/2/2015,286,Guinea,Cases,
4,12/31/2014,284,Guinea,Cases,2730.0
...,...,...,...,...,...
1947,3/27/2014,5,Mali,Deaths,
1948,3/26/2014,4,Mali,Deaths,
1949,3/25/2014,3,Mali,Deaths,
1950,3/24/2014,2,Mali,Deaths,


In [3]:
concat1 = pd.read_csv("Data/concat_1.csv")
concat2 = pd.read_csv("Data/concat_2.csv")
concat3 = pd.read_csv("Data/concat_3.csv")

# 열이름을 기준으로 합침
pd.concat([concat1, concat2, concat3], axis=1)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1,A.2,B.2,C.2,D.2
0,a0,b0,c0,d0,a4,b4,c4,d4,a8,b8,c8,d8
1,a1,b1,c1,d1,a5,b5,c5,d5,a9,b9,c9,d9
2,a2,b2,c2,d2,a6,b6,c6,d6,a10,b10,c10,d10
3,a3,b3,c3,d3,a7,b7,c7,d7,a11,b11,c11,d11
