# Gaon Chart EDA

### Data from: *http://gaonchart.co.kr/*

##### Notes:<br>- For some reason, sum of new sales =/= cumulative sales<br>- Based on values from other sources, I'll be using the cumulative sales column to analyze total sales<br>- Additionally, as the data below depicts monthly sales, the numbers do not consider the exact date the album was released 

### Import Data and Libraries

In [18]:
import pandas as pd
import plotly.express as px

In [19]:
df = pd.read_csv("GaonChartAlbumsMonthly2021New.csv")
df

Unnamed: 0,month,month_name,album_rank,rank_change,rank_difference,title,artist,new_sales,cumulative_sales,production
0,1,January,1,new,,THE FIRST STEP : TREASURE EFFECT,TREASURE,266894,266894,YG PLUS
1,1,January,2,down,-1.0,NCT RESONANCE Pt. 2 - The 2nd Album,NCT,244629,244629,Dreamus
2,1,January,3,new,,I burn,(G)I-DLE,159268,159268,Kakao Entertainment
3,1,January,4,new,,NOIR - The 2nd Mini Album,U-KNOW,138236,138236,Dreamus
4,1,January,5,new,,HIDEOUT: BE OUR VOICE - SEASON 3.,CRAVITY,112301,112301,Kakao Entertainment
...,...,...,...,...,...,...,...,...,...,...
1195,12,December,96,up,49.0,Merry & Happy,TWICE,3022,15763,Dreamus
1196,12,December,97,up,24.0,LALISA,LISA,2919,708475,YG PLUS
1197,12,December,98,up,21.0,VENI VIDI VICI,TRI.BE,2730,13345,Universal Music
1198,12,December,99,up,32.0,Love poem,IU,2685,31277,Kakao Entertainment


### New Sales Data

In [20]:
df['new_sales'].describe().apply("{0:.0f}".format)

count       1200
mean       47043
std       171823
min         2238
25%         6225
50%        10866
75%        24976
max      2490969
Name: new_sales, dtype: object

In [96]:
# Histogram of new sales of Top 10 albums by month
px.histogram(df, x="month_name", y="new_sales", nbins=12, barmode='group', color=df["rank_change"].where(df["rank_change"] == "new", "previously released"), title='Total New Sales Per Month')

### Monthly Data

#### Monthly Top 10 Albums

In [22]:
# Get Top 10 of each month
monthly_top_10 = df.groupby("month").head(10)
monthly_top_10.loc[monthly_top_10["month"] == 1]

Unnamed: 0,month,month_name,album_rank,rank_change,rank_difference,title,artist,new_sales,cumulative_sales,production
0,1,January,1,new,,THE FIRST STEP : TREASURE EFFECT,TREASURE,266894,266894,YG PLUS
1,1,January,2,down,-1.0,NCT RESONANCE Pt. 2 - The 2nd Album,NCT,244629,244629,Dreamus
2,1,January,3,new,,I burn,(G)I-DLE,159268,159268,Kakao Entertainment
3,1,January,4,new,,NOIR - The 2nd Mini Album,U-KNOW,138236,138236,Dreamus
4,1,January,5,new,,HIDEOUT: BE OUR VOICE - SEASON 3.,CRAVITY,112301,112301,Kakao Entertainment
5,1,January,6,new,,DEVIL,ONEUS,98014,98014,Kakao Entertainment
6,1,January,7,new,,Dystopia : Road to Utopia,Dreamcatcher,91469,91469,Genie Music
7,1,January,8,new,,VOICE : The future is now,VICTON,89960,89960,Kakao Entertainment
8,1,January,9,new,,5th Mini Album [YES.],Golden Child,74863,74863,Kakao Entertainment
9,1,January,10,up,16.0,THE ALBUM,BLACKPINK,67387,67387,YG PLUS


In [23]:
# Histogram of new sales of Top 10 albums filtering by month
# Example below shows data for March
px.histogram(monthly_top_10.loc[monthly_top_10["month_name"] == "March"], x="title", y="new_sales", color="artist", title='Top 10 Albums of the March') 

#### Monthly Top Ranked Album

In [24]:
# Get top ranked album of each month
monthly_first = df.groupby("month").head(1)
monthly_first

Unnamed: 0,month,month_name,album_rank,rank_change,rank_difference,title,artist,new_sales,cumulative_sales,production
0,1,January,1,new,,THE FIRST STEP : TREASURE EFFECT,TREASURE,266894,266894,YG PLUS
100,2,February,1,up,16.0,BE,BTS,763083,788535,Dreamus
200,3,March,1,new,,Bambi - The 3rd Mini Album,BAEKHYUN,591944,591944,Dreamus
300,4,April,1,new,,BORDER : CARNIVAL,ENHYPEN,522136,522136,"Genie Music, Stone Music Entertainment"
400,5,May,1,new,,맛 (Hot Sauce) - The 1st Album,NCT DREAM,1995091,1995091,Dreamus
500,6,June,1,new,,8th Mini Album `Your Choice`,SEVENTEEN,1391964,1391964,YG PLUS
600,7,July,1,new,,Butter,BTS,2490969,2490969,YG PLUS
700,8,August,1,new,,NOEASY,Stray Kids,1127800,1127800,Dreamus
800,9,September,1,new,,Sticker - The 3rd Album,NCT 127,2277575,2277575,Dreamus
900,10,October,1,new,,9th Mini Album `Attacca`,SEVENTEEN,1968829,1968829,YG PLUS


In [25]:
# Histogram of new sales of top ranked album each month
px.histogram(monthly_first, x="month_name", y="new_sales", nbins=12, color="title", title='Top Ranked Album of Each Month') 

In [26]:
# Percentage of monthly new sales that is from the top album
monthly_new_sales_total = df.groupby("month")["new_sales"].sum()
monthly_new_sales_top_10_total = df.groupby("month").head(1).groupby("month")["new_sales"].sum()
round(monthly_new_sales_top_10_total/monthly_new_sales_total, 2)


month
1     0.12
2     0.29
3     0.15
4     0.11
5     0.41
6     0.22
7     0.53
8     0.22
9     0.37
10    0.31
11    0.13
12    0.40
Name: new_sales, dtype: float64

### Annual Data

In [27]:
# Sorting data by album titles
# Not entirely necessary but easier to visualize next step
sort_by_album = df.sort_values(by="title")
sort_by_album

Unnamed: 0,month,month_name,album_rank,rank_change,rank_difference,title,artist,new_sales,cumulative_sales,production
911,10,October,12,new,,..선물,LEE CHAN WON (이찬원),70803,70803,MUSIC&NEW
1021,11,November,22,down,-10.0,..선물,LEE CHAN WON (이찬원),40584,111387,MUSIC&NEW
837,9,September,38,new,,1+1=1,HyunA&DAWN,14824,14824,Kakao Entertainment
388,4,April,89,new,,12월의 기적 (Miracles in December) (Korean Ver.),EXO,8193,8192,Dreamus
862,9,September,63,down,-55.0,1ST ALBUM PART 1 [The Awakening: Written In Th...,CRAVITY,7952,149540,Kakao Entertainment
...,...,...,...,...,...,...,...,...,...,...
963,10,October,64,down,-27.0,화양연화 pt.2,BTS,8751,133429,YG PLUS
546,6,June,47,up,4.0,화양연화 pt.2,BTS,11302,80134,YG PLUS
36,1,January,37,down,-1.0,화양연화 pt.2,BTS,9944,9944,Dreamus
138,2,February,39,down,-2.0,화양연화 pt.2,BTS,12324,22268,Dreamus


In [28]:
# Getting Top 100 albums of the year by cumulative_sales
total_sales = sort_by_album.groupby("title")["cumulative_sales"].max()
annual_top_100 = total_sales.sort_values(ascending=False).head(100).to_frame().reset_index()
annual_top_100

Unnamed: 0,title,cumulative_sales
0,Butter,2999407
1,Sticker - The 3rd Album,2431995
2,맛 (Hot Sauce) - The 1st Album,2097185
3,9th Mini Album `Attacca`,2059073
4,Universe - The 3rd Album,1630715
...,...,...
95,SUMMER POPUP ALBUM [POPPING],111875
96,..선물,111387
97,The Book of Us : Negentropy - Chaos swallowed ...,110106
98,6TH MINI ALBUM [Goosebumps],109805


In [29]:
# Adding column for ranks
annual_top_100["album_rank"] = annual_top_100["cumulative_sales"].rank(ascending=False)
annual_top_100

Unnamed: 0,title,cumulative_sales,album_rank
0,Butter,2999407,1.0
1,Sticker - The 3rd Album,2431995,2.0
2,맛 (Hot Sauce) - The 1st Album,2097185,3.0
3,9th Mini Album `Attacca`,2059073,4.0
4,Universe - The 3rd Album,1630715,5.0
...,...,...,...
95,SUMMER POPUP ALBUM [POPPING],111875,96.0
96,..선물,111387,97.0
97,The Book of Us : Negentropy - Chaos swallowed ...,110106,98.0
98,6TH MINI ALBUM [Goosebumps],109805,99.0


In [30]:
# Histogram of total sales for each album in the annual Top 100
px.histogram(annual_top_100, x="album_rank", y="cumulative_sales", nbins=100, color="title", title='Top 100 Albums of the Year')

#### Albums Ranked Year-Round

In [31]:
# Checking if any albums ranked each month throughout the entire year
year_round_albums = df.groupby("title").filter(lambda x: len(x) == 12)
year_round_albums

Unnamed: 0,month,month_name,album_rank,rank_change,rank_difference,title,artist,new_sales,cumulative_sales,production
9,1,January,10,up,16.0,THE ALBUM,BLACKPINK,67387,67387,YG PLUS
16,1,January,17,down,-5.0,BE,BTS,25452,25452,Dreamus
17,1,January,18,down,-2.0,MAP OF THE SOUL : 7,BTS,24801,24801,Dreamus
18,1,January,19,down,-1.0,MAP OF THE SOUL : PERSONA,BTS,24693,24693,Dreamus
19,1,January,20,down,-11.0,BORDER : DAY ONE,ENHYPEN,24576,24576,"Genie Music, Stone Music Entertainment"
...,...,...,...,...,...,...,...,...,...,...
1157,12,December,58,up,19.0,IN生,Stray Kids,9230,125570,Dreamus
1158,12,December,59,up,6.0,THE ALBUM,BLACKPINK,9194,190729,YG PLUS
1159,12,December,60,down,-9.0,2 Cool 4 Skool,BTS,8609,115160,YG PLUS
1165,12,December,66,up,6.0,GO生,Stray Kids,7903,113463,Dreamus


In [32]:
# Line graph showing growth of sales over the year
# Only includes albums that appeared on the charts every month
px.line(year_round_albums, x="month_name", y="cumulative_sales", color="title", title='Annual Growth of Sales')
# px.line(year_round_albums.loc[year_round_albums["artist"] == "BTS"], x="month", y="new_sales", title='Graph', color="title")


### Artist Counts

In [33]:
# Number of unique values in each column
# # Focusing on number of unique artists mentioned in the monthly charts
num_unique_artists = df.nunique()
num_unique_artists

month                 12
month_name            12
album_rank           100
rank_change            5
rank_difference      137
title                491
artist               205
new_sales           1155
cumulative_sales    1179
production            15
dtype: int64

In [34]:
# Total number of times an artist has been included in the monthly charts throughout the year
artist_mentions = df["artist"].value_counts()
artist_mentions.head(10)

BTS            187
Stray Kids      91
TXT             61
BLACKPINK       37
SEVENTEEN       35
TWICE           35
NCT DREAM       34
NCT 127         32
THE BOYZ        27
ENHYPEN         24
Name: artist, dtype: int64

### Albums With Greatest Rank Changes

In [88]:
# Sorting original dataframe by month then rank difference
# Removed rows with no rank difference value before sorting
sort_by_rank_difference = df[~df['rank_difference'].isna()].sort_values(['month', 'rank_difference'], ascending = (True, False))
sort_by_rank_difference

Unnamed: 0,month,month_name,album_rank,rank_change,rank_difference,title,artist,new_sales,cumulative_sales,production
38,1,January,39,up,82.0,SuperM - The 1st Mini Album,SuperM,9723,9723,Dreamus
86,1,January,87,up,77.0,`Present : YOU` &ME Edition,GOT7,3165,3165,Dreamus
76,1,January,77,up,73.0,Call My Name,GOT7,3704,3704,Dreamus
58,1,January,59,up,70.0,REVEAL,THE BOYZ,6041,6041,Kakao Entertainment
69,1,January,70,up,64.0,4th MINI ALBUM [DreamLike],THE BOYZ,4896,4896,Kakao Entertainment
...,...,...,...,...,...,...,...,...,...,...
1140,12,December,41,down,-38.0,3rd Single Album [MAVERICK],THE BOYZ,14382,642369,Kakao Entertainment
1169,12,December,70,down,-43.0,Guilty Pleasure,Hwa Sa,6949,37308,Kakao Entertainment
1175,12,December,76,down,-61.0,폐허가 된다 해도,LEE SEUNG YOON,4708,66266,Kakao Entertainment
1178,12,December,79,down,-61.0,Re:T-ARA,T-ara,4656,53956,Dreamus


In [92]:
# Albums in the chart whose ranks rose the most
greatest_pos_change = sort_by_rank_difference.groupby('month').head(1)
greatest_pos_change

Unnamed: 0,month,month_name,album_rank,rank_change,rank_difference,title,artist,new_sales,cumulative_sales,production
38,1,January,39,up,82.0,SuperM - The 1st Mini Album,SuperM,9723,9723,Dreamus
195,2,February,96,up,86.0,중독 (Overdose),EXO-K,3461,3979,Dreamus
218,3,March,19,up,62.0,BORDER : DAY ONE,ENHYPEN,29332,59074,"Genie Music, Stone Music Entertainment"
331,4,April,32,up,73.0,NCT #127 Regulate - The 1st Album Repackage,NCT 127,29984,34918,Dreamus
433,5,May,34,up,62.0,3RD ALBUM `An Ode`,SEVENTEEN,20608,70164,YG PLUS
594,6,June,95,up,29.0,7th Mini Album [GATEWAY],ASTRO,5127,32980,Kakao Entertainment
628,7,July,29,up,80.0,JUST BURN,JUST B,14767,18210,Bugs (NHN벅스)
761,8,August,62,up,50.0,NCT RESONANCE Pt. 2 - The 2nd Album,NCT,7337,388555,Dreamus
885,9,September,86,up,76.0,Dear OHMYGIRL,OH MY GIRL,4152,105997,Sony Music
936,10,October,37,up,65.0,Dark Dream,E`LAST,19130,22130,MUSIC&NEW


In [93]:
# Albums in the chart whose ranks fell the most each month
greatest_neg_change = sort_by_rank_difference.groupby('month').tail(1)
greatest_neg_change

Unnamed: 0,month,month_name,album_rank,rank_change,rank_difference,title,artist,new_sales,cumulative_sales,production
78,1,January,79,down,-68.0,3rd Mini Album `INSIDE ME`,KIM SUNG KYU (김성규),3465,3465,Kakao Entertainment
184,2,February,85,down,-84.0,THE FIRST STEP : TREASURE EFFECT,TREASURE,4329,271223,YG PLUS
299,3,March,100,down,-77.0,NCT #127 CHERRY BOMB - The 3rd Mini Album,NCT 127,5391,29993,Dreamus
398,4,April,99,down,-59.0,KILL THIS LOVE,BLACKPINK,6838,40854,YG PLUS
489,5,May,90,down,-79.0,Be My Reason,Hwang Chi Yeul (황치열),4329,88925,Kakao Entertainment
593,6,June,94,down,-82.0,We Boom - The 3rd Mini Album,NCT DREAM,5244,129693,Dreamus
699,7,July,100,down,-86.0,Summer Queen,Brave Girls,4284,75663,Kakao Entertainment
793,8,August,94,down,-65.0,JUST BURN,JUST B,3395,21605,Bugs (NHN벅스)
894,9,September,95,down,-75.0,Play Game : Holiday,Weeekly,3565,46367,Kakao Entertainment
991,10,October,92,down,-78.0,Eternal,Young K,4601,56274,Dreamus
