In [7]:
import pandas as pd
import numpy as np

In [2]:
path = 'sw_performers.csv'
performers_df = pd.read_csv(path)

In [3]:
path = 'billboard_data/data/hot_stuff_2.csv'
df = pd.read_csv(path)

In [4]:
# Find out which artists have top-100 appearances

# Naive approach
sw_artists = set(performers_df['Performer'])
top_100_artists = set(df['performer'])
popular_artists = set(sw_artists & top_100_artists)
df = df.loc[df['performer'].isin(popular_artists)]
print('Naive Overlap Size:', len(popular_artists))

Naive Overlap Size: 83


In [5]:
# Add weekly score 
df['score'] = df['week_position'].apply(lambda x: 100 - x)

# Clean up artists who have multiple hits in same week
temp_df = df[['weekid', 'performer', 'score']]
temp_df = temp_df.groupby(['performer', 'weekid']).sum().reset_index()
temp_df = temp_df.sort_values(['performer', 'weekid'])

# Add cumulative score
cum_scores = []
prev_performer = None
for i, row in temp_df.iterrows():   
    
    cur_score = row['score']
    
    if row['performer'] == prev_performer:
         cur_score += cum_scores[-1]
    
    cum_scores.append(cur_score)
    
    prev_performer = row['performer']

temp_df['cum_score'] = cum_scores

In [36]:
# Visualisation: which years overall are the most popular

# Final popularity per artist
final_popularity_df = temp_df.groupby('performer').last().reset_index()

# Find the best scores per artist
best_scores_df = performers_df.merge(
                    final_popularity_df[['performer', 'cum_score']], 
                    left_on='Performer', 
                    right_on='performer'
                 )
    
best_scores_df = best_scores_df[['Year', 'Performer', 'cum_score']]
best_scores_df = best_scores_df.sort_values(['Year', 'Performer'])

In [40]:
# Stacked bar chart visualisation

# MAKE SCORE AND NAME ARRAYS FOR VISUALIZATION
max_artists_in_year = best_scores_df['Year'].value_counts().max()
number_of_years = len(set(best_scores_df['Year']))

score_array = np.full((number_of_years, max_artists_in_year), 0)
name_array = np.full((number_of_years, max_artists_in_year), None)

for x, year in enumerate(best_scores_df['Year'].unique()):
    cur_year = best_scores_df.loc[best_scores_df['Year'] == year].reset_index()
    for y, row in cur_year.iterrows():
        score_array[x, y] = row['cum_score']
        name_array[x, y] = row['Performer']


import plotly.graph_objects as go

animals=['giraffes', 'orangutans', 'monkeys']
years = best_scores_df['Year'].unique()

fig = go.Figure(data=[
    go.Bar(x=years, y=score_array[:, 0], hovertext=name_array[:, 0]),
    go.Bar(x=years, y=score_array[:, 1], hovertext=name_array[:, 1]),
    go.Bar(x=years, y=score_array[:, 2], hovertext=name_array[:, 2]),
    go.Bar(x=years, y=score_array[:, 3], hovertext=name_array[:, 3]),
    go.Bar(x=years, y=score_array[:, 4], hovertext=name_array[:, 4]),
])
# Change the bar mode
fig.update_layout(barmode='stack')
fig.show()

In [41]:
import plotly.graph_objects as go

year_of_interest = 1968
artists_at_yof = list(best_scores_df.loc[best_scores_df['Year'] == year_of_interest]['Performer'])

plot_df = temp_df.loc[temp_df['performer'].isin(artists_at_yof)]

x=['Winter', 'Spring', 'Summer', 'Fall']

colors = ['rgb(131, 90, 241)', 'rgb(111, 231, 219)', 'rgb(184, 247, 212)']

fig = go.Figure()


for a in artists_at_yof:
    a_df = plot_df.loc[plot_df['performer'] == a]
    print(a.head)

#     fig.add_trace(go.Scatter(
#         x=x, y=[40, 60, 40, 10],
#         hoverinfo='x+y',
#         mode='lines',
#         line=dict(width=0.5, color='rgb(131, 90, 241)'),
#         stackgroup='one' # define stack group
#     ))

    break
    
    


# fig.add_trace(go.Scatter(
#     x=x, y=[20, 10, 10, 60],
#     hoverinfo='x+y',
#     mode='lines',
#     line=dict(width=0.5, color='rgb(111, 231, 219)'),
#     stackgroup='one'
# ))
# fig.add_trace(go.Scatter(
#     x=x, y=[40, 30, 50, 30],
#     hoverinfo='x+y',
#     mode='lines',
#     line=dict(width=0.5, color='rgb(184, 247, 212)'),
#     stackgroup='one'
# ))

fig.update_layout(yaxis_range=(0, 100))
fig.show()

Unnamed: 0,performer,weekid,score,cum_score
1359,Dionne Warwick,1962-12-08,22,22
1360,Dionne Warwick,1962-12-15,39,61
1361,Dionne Warwick,1962-12-22,50,111
1362,Dionne Warwick,1962-12-29,54,165
1363,Dionne Warwick,1963-01-05,68,233
1364,Dionne Warwick,1963-01-12,72,305
1365,Dionne Warwick,1963-01-19,74,379
1366,Dionne Warwick,1963-01-26,79,458
1367,Dionne Warwick,1963-02-02,79,537
1368,Dionne Warwick,1963-02-09,74,611


In [121]:
x

2