In [167]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [2]:
path = 'sw_performers.csv'
performers_df = pd.read_csv(path)

In [3]:
path = 'billboard_data/data/hot_stuff_2.csv'
df = pd.read_csv(path)

In [4]:
# Find out which artists have top-100 appearances

# Naive approach
sw_artists = set(performers_df['Performer'])
top_100_artists = set(df['performer'])
popular_artists = set(sw_artists & top_100_artists)
df = df.loc[df['performer'].isin(popular_artists)]
print('Naive Overlap Size:', len(popular_artists))

Naive Overlap Size: 83


In [70]:
# Add weekly score 
df['score'] = df['week_position'].apply(lambda x: 100 - x)

# Clean up artists who have multiple hits in same week
result_df = df[['weekid', 'performer', 'score']]
result_df = result_df.groupby(['performer', 'weekid']).sum().reset_index()
result_df = result_df.sort_values(['performer', 'weekid'])

# Add cumulative score
cum_scores = []
prev_performer = None
for i, row in temp_df.iterrows():   
    
    cur_score = row['score']
    
    if row['performer'] == prev_performer:
         cur_score += cum_scores[-1]
    
    cum_scores.append(cur_score)
    
    prev_performer = row['performer']

result_df['cum_score'] = cum_scores

In [71]:
# Visualisation: which years overall are the most popular

# Final popularity per artist
final_popularity_df = result_df.groupby('performer').last().reset_index()

# Find the best scores per artist
best_scores_df = performers_df.merge(
                    final_popularity_df[['performer', 'cum_score']], 
                    left_on='Performer', 
                    right_on='performer'
                 )
    
best_scores_df = best_scores_df[['Year', 'Performer', 'cum_score']]
best_scores_df = best_scores_df.sort_values(['Year', 'Performer'])

In [168]:
# Stacked bar chart visualisation

# MAKE SCORE AND NAME ARRAYS FOR VISUALIZATION
max_artists_in_year = best_scores_df['Year'].value_counts().max()
number_of_years = len(set(best_scores_df['Year']))

score_array = np.full((number_of_years, max_artists_in_year), 0)
name_array = np.full((number_of_years, max_artists_in_year), None)

for x, year in enumerate(best_scores_df['Year'].unique()):
    cur_year = best_scores_df.loc[best_scores_df['Year'] == year].reset_index()
    for y, row in cur_year.iterrows():
        score_array[x, y] = row['cum_score']
        name_array[x, y] = row['Performer']


animals=['giraffes', 'orangutans', 'monkeys']
years = best_scores_df['Year'].unique()

fig = go.Figure(data=[
    go.Bar(x=years, y=score_array[:, 0], hovertext=name_array[:, 0]),
    go.Bar(x=years, y=score_array[:, 1], hovertext=name_array[:, 1]),
    go.Bar(x=years, y=score_array[:, 2], hovertext=name_array[:, 2]),
    go.Bar(x=years, y=score_array[:, 3], hovertext=name_array[:, 3]),
    go.Bar(x=years, y=score_array[:, 4], hovertext=name_array[:, 4]),
])
# Change the bar mode
fig.update_layout(barmode='stack')
fig.show()

In [217]:
import plotly.graph_objects as go

year_of_interest = 1968
yof_artists = list(best_scores_df.loc[best_scores_df['Year'] == year_of_interest]['Performer'])

# Find all dates and create (artist, artist_df) tuples
yof_artists_tuples = []
all_dates = set()
for a in yof_artists:
    yof_artists_tuples.append((a, result_df.loc[temp_df['performer'] == a]))
    all_dates = set(all_dates | set(result_df.loc[temp_df['performer'] == a]['weekid']))

# Append missing dates to each performer
for i, artist_tuple in enumerate(yof_artists_tuples):
    a, cur_df = artist_tuple
    missing_dates = set(all_dates - set(cur_df['weekid']))
    n = len(missing_dates)
    missing_dates_df = pd.DataFrame([
                                [a for i in range(n)], # artist name
                                list(missing_dates), # weekid
                                [0 for i in range(n)], # temporary score
                                [0 for i in range(n)] # temporary cum_score
                            ]
                        )
    missing_dates_df = missing_dates_df.transpose()
    missing_dates_df.columns = ['performer', 'weekid', 'score', 'cum_score']
    
    # Append missing dates to each artist
    cur_df = cur_df.append(missing_dates_df, ignore_index=True)
    cur_df = cur_df.sort_values('weekid').reset_index(drop=True)
    
    # Reset score and cum_score so that all zero values are populated
    new_cum_scores = []
    prev_cum_score = 0
    for _, row in cur_df.iterrows():
        # If cum_score is zero then append with previous value
        if row['cum_score'] == 0:
            new_cum_scores.append(prev_cum_score)
            
        # If cum_score is non zero then set to current value
        else:
            new_cum_scores.append(row['cum_score'])
            
            # Update previous score values ONLY when cum_score non zero
            prev_cum_score = row['cum_score']
        
    # Overwrite artist dataframe scores and cum_scores
    cur_df['cum_score'] = new_cum_scores
    
    # Update artist tuples
    yof_artists_tuples[i] = (a, cur_df)
    


colors = ['rgb(131, 90, 241)', 'rgb(111, 231, 219)', 'rgb(184, 247, 212)', 'rgb(185, 50, 194)', 'rgb(172, 46, 254)']
fig = go.Figure()
min_date = '2100-01-01'
for i, artist_tuple in enumerate(yof_artists_tuples):

    # Unpack tuple
    artist, artist_df = artist_tuple

    fig.add_trace(go.Scatter(
        name=artist,
        x=artist_df['weekid'], 
        y=artist_df['cum_score'],
        mode='lines',
        line=dict(width=0.5, color=colors[i]),
        stackgroup='one' # define stack group
    ))
    
    # Find minimum date for plotting purposes ranges
    min_date = min(min_date, min(artist_df['weekid']))

    
sw_date = str(year_of_interest) + '-04-21'
# Add shape regions: highlight post SW
fig.update_layout(
    shapes=[
        dict(
            type="rect",
            # x-reference is assigned to the x-values
            xref="x",
            # y-reference is assigned to the plot paper [0,1]
            yref="paper",
            x0=min_date,
            y0=0,
            x1=sw_date,
            y1=1,
            fillcolor="Grey",
            opacity=0.7,
            layer="below",
            line_width=0,
        )
    ]
)
    
fig.show()

2