# Importing Packages

In [None]:
import sqlite3
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Loading Data

In [None]:
sql_file = './pre_processed_data/lab2.db'
with sqlite3.connect(sql_file) as conn:
    df_artists = pd.read_sql(f"""
                            SELECT *
                            FROM wiki_artists
                            """, conn)
    df_spotify = pd.read_sql(f"""
                            SELECT *
                            FROM spotify
                            """, conn)

In [None]:
df_asian = df_spotify[(df_spotify.chartname == 'top200') &
                      (df_spotify.chartfreq == 'weekly') &
                      (df_spotify.region == 'global') &
                       df_spotify.artist2.isin(
                              list(df_artists.name))]
df_asian.head()

# Data Processing

In [None]:
color_dict = {'Chinese' : '#ffc60a',
              'Filipino' : '#005799',
              'Filipino American' : '#61baff',
              'Japanese' : '#30916f',
              'Japanese American' : '#79d2a6',
              'Korean American' : '#ff9a47',
              'South Korean' : '#f06c00'}
lineage_color_dict = {'Asian':'#D9734E',
                      'Mixed Asian' : '#F2B05E'}

In [None]:
artists_country = dict(zip(df_artists.name,df_artists.country))
artists_lineage = dict(zip(df_artists.name,df_artists.lineage))
df_asian['country'] = df_asian.artist2.map(artists_country)
df_asian['lineage'] = df_asian.artist2.map(artists_lineage)
df_asian['date'] = pd.to_datetime(df_asian['date'])
df_asian = df_asian[['date','artist2',
                     'stream_count',
                     'country','lineage']]
df_asian['artist2'] = df_asian.artist2.str.title()
df_asian.replace({'artist2': {'Bts': 'BTS',
                              'K/Da': 'K/DA',
                              'Exo' : 'EXO',
                              'Superm' : 'SuperM',
                              'Iu' : 'IU'}}, inplace = True)
display(df_asian.head())

In [None]:
df_monthly_lineage = df_asian.groupby([pd.Grouper(key="date",
                                                  freq="1M"),
                                       'lineage']).sum().reset_index()
df_monthly_lineage['color'] = df_monthly_lineage.\
                              lineage.\
                              map(lineage_color_dict)
df_monthly_lineage.head()

In [None]:
df_artist_monthly = (df_asian.groupby([pd.Grouper(key="date", freq="1M"),
                               'lineage','country','artist2'])
                              .sum()
                              .reset_index())
df_artist_monthly = df_artist_monthly.sort_values(['lineage','country','artist2'])
df_artist_monthly['color'] = df_artist_monthly.country.map(color_dict)
df_artist_monthly['lineage_color'] = df_artist_monthly.lineage.map(lineage_color_dict)
df_artist_monthly.head()

## Saving to Post-Processed Data Folder

In [None]:
df_monthly_lineage.to_csv('./post_processed_data/bubble_lineage.csv',
                          index = False)
df_artist_monthly.to_csv('./post_processed_data/bubble_artist.csv',
                         index = False)

# Visualizations

In [None]:
df_monthly_lineage = pd.read_csv('./post_processed_data/bubble_lineage.csv')
df_artist_monthly = pd.read_csv('./post_processed_data/bubble_artist.csv')

## Dual Bubble Plots

In [None]:
fig = make_subplots(rows=2,
                    shared_xaxes=True,
                    vertical_spacing = 0.0125,
                    row_heights = [5,1])

for country in df_artist_monthly.country.unique():
    sub = df_artist_monthly[df_artist_monthly.country == country]
    fig.add_trace(
        go.Scatter(x = sub.date,
                   y = sub.artist2,
                   mode = 'markers',
                   legendgroup = '1',
                   name = country,
                   marker = dict(
                           size = sub.stream_count,
                           sizemode = 'area',
                           sizeref = 2*max(df_artist_monthly.stream_count)/(40**2),
                           sizemin = 1,
                           color = sub.color
                   )
                  ),
        row = 1,
        col = 1
    )

for lineage in df_monthly_lineage.lineage.unique():
    sub2 = df_monthly_lineage[df_monthly_lineage.lineage == lineage]
    fig.add_trace(
        go.Scatter(y = sub2.lineage.str.title(),
                   x = sub2.date,
                   mode = 'markers',
                   legendgroup = '2',
                   name = lineage,
                   marker = dict(
                           size =  sub2.stream_count,
                           sizemode = 'area',
                           sizeref = 2*max( df_monthly_lineage.stream_count)/(40**2),
                           sizemin = 1,
                           color = sub2.color
                   )
                  ),
        row = 2,
        col = 1
    )
    
fig.add_annotation(text="Asians overtake<br>Mixed Asians<br>"
                   "in total<br>Spotify Streams",
                    xref="x1", yref="paper",
                    x='2018-05-30', y=1.025,
                    font = dict(size = 10),
                    showarrow=False)

fig.add_annotation(text="Olivia Rodrigo<br>releases drivers license",
                    xref="x1", yref="paper",
                    x='2021-01-31', y=1,
                    font = dict(size = 10),
                    showarrow=False)

fig.add_annotation(text="Global Korean Pop Craze",
                    xref="paper", yref="paper",
                    x=0.55, y=0.975,
                    font = dict(size = 10),
                    showarrow=False)

fig.update_layout(
    width=1000,
    height=1150,
    xaxis2_title="Dates",
    yaxis1_title="Artists",
    yaxis2_title="Lineage",
    legend_title="Artist Classification",
    hovermode=False,
    legend_tracegroupgap = 710,
    legend_traceorder = "reversed+grouped",
    template='none',
    shapes = [
        dict(type = 'rect', 
            x0= '0.02', 
            y0= '0.74',
            x1= '0.0125',
            y1= '0.96',
            xref = 'paper',
            yref = 'paper',
            opacity = 1,
            fillcolor = '#F2B05E',
            line_width=0,
            ),
        dict(type = 'rect', 
            x0= '0.02', 
            y0= '0.21',
            x1= '0.0125',
            y1= '0.74',
            xref = 'paper',
            yref = 'paper',
            opacity = 1,
            fillcolor = '#D9734E',
            line_width=0,
            ),
        dict(type = 'line',
            x0= '2018-05-30', 
            y0= '0',
            x1= '2018-05-30',
            y1= '0.96',
            xref = 'x1',
            yref = 'paper',
            opacity = 0.25,
            line = dict(dash= "dot")
            ),
        dict(type = 'line',
            x0= '2021-01-31', 
            y0= '0',
            x1= '2021-01-31',
            y1= '0.96',
            xref = 'x1',
            yref = 'paper',
            opacity = 0.25,
            line = dict(dash= "dot")
            ),
        dict(type = 'rect',
            x0= '2018-05-30', 
            y0= '0',
            x1= '2021-01-31',
            y1= '0.96',
            xref = 'x1',
            yref = 'paper',
            opacity = 0.1,
            fillcolor = '#C79BF2',
            layer="below",
            line_width=0,
            )
    ]
)

fig.update_yaxes(
    ticklabelposition="outside",
    automargin = True,
    title_standoff = 25
)

## Bubble + Bar Plots Colorful

In [12]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

df_monthly_lineage = pd.read_csv('./post_processed_data/bubble_lineage.csv')
df_artist_monthly = pd.read_csv('./post_processed_data/bubble_artist.csv')

color_dict = {'Chinese' : '#ffc60a',
              'Filipino' : '#005799',
              'Filipino American' : '#61baff',
              'Japanese' : '#30916f',
              'Japanese American' : '#79d2a6',
              'Korean American' : '#ff9a47',
              'South Korean' : '#f06c00'}
lineage_color_dict = {'Asian':'#D9734E',
                      'Mixed Asian' : '#F2B05E'}

fig = make_subplots(rows=2,
                    shared_xaxes=True,
                    vertical_spacing = 0.0125,
                    row_heights = [1,5])

# Bar Plot at Row 1
for lineage in df_monthly_lineage.lineage.unique():
    bar_df = df_monthly_lineage[df_monthly_lineage.lineage == lineage]
    fig.add_trace(go.Bar(
            x= bar_df.date,
            y= bar_df.stream_count,
            name = lineage,
            marker_color = lineage_color_dict[lineage]
        ),
    row = 1,
    col = 1
    )


for country in df_artist_monthly.country.unique():
    sub = df_artist_monthly[df_artist_monthly.country == country]
    fig.add_trace(
        go.Scatter(x = sub.date,
                   y = sub.artist2,
                   mode = 'markers',
                   legendgroup = '1',
                   name = country,
                   marker = dict(
                           size = sub.stream_count,
                           sizemode = 'area',
                           sizeref = 2*max(df_artist_monthly.stream_count)/(40**2),
                           sizemin = 1,
                           color = sub.color
                   )
                  ),
        row = 2,
        col = 1
    )

    
fig.add_annotation(text="Asians overtake<br>Mixed Asians<br>"
                   "in total<br>Spotify Streams",
                    xref="x1", yref="paper",
                    x='2018-05-30', y=1,
                    font = dict(size = 10),
                    showarrow=False)

fig.add_annotation(text="Olivia Rodrigo<br>releases<br>drivers license",
                    xref="x1", yref="paper",
                    x='2021-01-31', y=1,
                    font = dict(size = 10),
                    showarrow=False)

fig.add_annotation(text="Global Korean Pop Craze",
                    xref="paper", yref="paper",
                    x=0.55, y=1.025,
                    font = dict(size = 10),
                    showarrow=False)

fig.update_layout(
    width=1000,
    height=1150,
    xaxis2_title="<b>Dates<b>",
    yaxis1_title="<b>Total # of Streams<b>",
    yaxis2_title="<b>Artists<b>",
    title={
        'text': "<b>Total # of Streamers per Month of Asian Artists<b>",
        'y':0.975,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    legend_title="Artist Classification",
    hovermode=False,
    legend_traceorder = "reversed+grouped",
    template='none',
    shapes = [
        dict(type = 'rect', 
            x0= '0.02', 
            y0= '0.565',
            x1= '0.0125',
            y1= '0.8',
            xref = 'paper',
            yref = 'paper',
            opacity = 1,
            fillcolor = '#F2B05E',
            line_width=0,
            ),
        dict(type = 'rect', 
            x0= '0.02', 
            y0= '0',
            x1= '0.0125',
            y1= '0.565',
            xref = 'paper',
            yref = 'paper',
            opacity = 1,
            fillcolor = '#D9734E',
            line_width=0,
            ),
        dict(type = 'line',
            x0= '2018-05-30', 
            y0= '0',
            x1= '2018-05-30',
            y1= '1',
            xref = 'x1',
            yref = 'paper',
            opacity = 0.25,
            line = dict(dash= "dot")
            ),
        dict(type = 'line',
            x0= '2021-01-31', 
            y0= '0',
            x1= '2021-01-31',
            y1= '1',
            xref = 'x1',
            yref = 'paper',
            opacity = 0.25,
            line = dict(dash= "dot")
            ),
        dict(type = 'rect',
            x0= '2018-05-30', 
            y0= '0',
            x1= '2021-01-31',
            y1= '1',
            xref = 'x1',
            yref = 'paper',
            opacity = 0.1,
            fillcolor = '#C79BF2',
            layer="below",
            line_width=0,
            )
    ]
)

fig.update_yaxes(
    ticklabelposition="outside",
    automargin = True,
    title_standoff = 25
)
fig.show()

## Bubble Plots + Bar Plot

In [None]:
shapes =[
        dict(type = 'line',
            x0= '2018-05-30', 
            y0= '0',
            x1= '2018-05-30',
            y1= '1',
            xref = 'x1',
            yref = 'paper',
            opacity = 0.25,
            line = dict(dash= "dot")
            ),
        dict(type = 'line',
            x0= '2021-01-31', 
            y0= '0',
            x1= '2021-01-31',
            y1= '1',
            xref = 'x1',
            yref = 'paper',
            opacity = 0.25,
            line = dict(dash= "dot")
            ),
        dict(type = 'rect',
            x0= '2018-05-30', 
            y0= '0',
            x1= '2021-01-31',
            y1= '1',
            xref = 'x1',
            yref = 'paper',
            opacity = 0.1,
            fillcolor = '#C79BF2',
            layer="below",
            line_width=0,
            )
    ]

In [None]:
annotations = [
        {'font': {'size': 13},
         'visible' : True,
         'showarrow': False,
         'text': 'South Korean Artists',
         'x': 1.05,
         'xanchor': 'center',
         'xref': 'paper',
         'y': 0.675,
         'yanchor': 'bottom',
         'yref': 'paper',
         'textangle': 90},
        {'font': {'size': 13},
         'showarrow': False,
         'text': 'Japanese Artists',
         'x': 1.05,
         'xanchor': 'center',
         'xref': 'paper',
         'y': 0.47875,
         'yanchor': 'bottom',
         'yref': 'paper',
         'textangle': 90},
        {'font': {'size': 13},
         'showarrow': False,
         'text': 'Filipino<br>Artists',
         'x': 1.05,
         'xanchor': 'center',
         'xref': 'paper',
         'y': 0.375,
         'yanchor': 'bottom',
         'yref': 'paper',
         'textangle': 90},
        {'font': {'size': 12},
         'showarrow': False,
         'text': 'Viet...<br>Artists',
         'x': 1.05,
         'xanchor': 'center',
         'xref': 'paper',
         'y': 0.33,
         'yanchor': 'middle',
         'yref': 'paper',
         'textangle': 90},
        {'font': {'size': 12},
         'showarrow': False,
         'text': 'Chinese<br>Artists',
         'x': 1.05,
         'xanchor': 'center',
         'xref': 'paper',
         'y': 0.27,
         'yanchor': 'middle',
         'yref': 'paper',
         'textangle': 90},
        {'font': {'size': 13},
         'showarrow': False,
         'text': 'Filipino<br>American<br>Artists',
         'x': 1.05,
         'xanchor': 'center',
         'xref': 'paper',
         'y': 0.16,
         'yanchor': 'bottom',
         'yref': 'paper',
         'textangle': 90},
        {'font': {'size': 13},
         'showarrow': False,
         'text': 'Japanese<br>American<br>Artists',
         'x': 1.05,
         'xanchor': 'center',
         'xref': 'paper',
         'y': 0.045,
         'yanchor': 'bottom',
         'yref': 'paper',
         'textangle': 90},
        {'font': {'size': 12},
         'showarrow': False,
         'text': 'Korean<br>American<br>Artists',
         'x': 1.05,
         'xanchor': 'center',
         'xref': 'paper',
         'y': 0,
         'yanchor': 'middle',
         'yref': 'paper',
         'textangle': 90},
    ]
   

In [None]:
fig = make_subplots(rows=9,
                    shared_xaxes=True,
                    vertical_spacing=0.025,
                    row_heights=[7, 13, 7, 5, 1, 1, 6, 4, 1],
                   )

# Bar Plot at Row 1
for lineage in df_monthly_lineage.lineage.unique():
    bar_df = df_monthly_lineage[df_monthly_lineage.lineage == lineage]
    fig.add_trace(go.Bar(
            x= bar_df.date,
            y= bar_df.stream_count,
            name = lineage,
            marker_color = lineage_color_dict[lineage]
        ),
    row = 1,
    col = 1
    )

# Bubble Plots
nationality = ['South Korean', 'Japanese', 'Filipino',
               'Vietnamese', 'Chinese', 'Filipino American',
               'Japanese American', 'Korean American']
for i,country in enumerate(nationality):
    sub = df_artist_monthly[df_artist_monthly.country == country]
    fig.add_trace(
        go.Scatter(x = sub.date,
                   y = sub.artist2,
                   mode = 'markers',
                   legendgroup = '1',
                   name = country,
                   showlegend = False,
                   marker = dict(
                           size = sub.stream_count,
                           sizemode = 'area',
                           sizeref = 2*max(df_artist_monthly.stream_count)/(40**2),
                           sizemin = 1,
                           color = sub.lineage_color
                   )
                  ),
        row = 2 + i,
        col = 1
    )
    
fig.update_layout(
    width=1000,
    height=1150,
    template='none',
    xaxis9_title="Dates",
    yaxis1_title="Total # of Streams",
    hovermode = False,
    shapes = shapes,
    annotations = annotations
)

fig.update_yaxes(
    ticklabelposition="outside left",
    automargin = True,
    title_standoff = 25
)

# Add Annotations
fig.add_annotation(text="Asians overtake<br>Mixed Asians<br>"
                   "in total<br>Spotify Streams",
                    xref="x1", yref="paper",
                    x='2018-05-30', y=1,
                    font = dict(size = 10),
                    showarrow=False)

fig.add_annotation(text="Olivia Rodrigo<br>releases<br>drivers license",
                    xref="x1", yref="paper",
                    x='2021-01-31', y=1,
                    font = dict(size = 10),
                    showarrow=False)

fig.add_annotation(text="Global Korean Pop Craze",
                    xref="paper", yref="paper",
                    x=0.55, y=1.025,
                    font = dict(size = 10),
                    showarrow=False)

fig.show()


## Importance

**Aims to Answer the Question**

Check if improvement is due to mixed asians like bruno mars topping the chart - Asians vs Mixed Asians 

**Possible Caption**

Our findings (See Figure X) show that although Mixed Asians dominated Spotify's Top 200 Global Charts from
2017 to the first quarter of 2018, an evident breakthrough by purely Asian descent musicians/groups was seen
in May 2018 where the likes of Vietnamese singer, Taiga, and Korean Groups, BTS and Blackpink (See Figure X2) 
had overtaken mixed Asians in terms of total Spotify streams. Transition to Point 4.