# Synthanic shakeup scatterplot
#### This is the [shakeup scatterplot](https://www.kaggle.com/carlmcbrideellis/shakeup-scatterplots-boxes-strings-and-things), and the 'Top 20' leaderboard, for the [Tabular Playground Series - Apr 2021 "*Synthanic - You're going to need a bigger boat*"](https://www.kaggle.com/c/tabular-playground-series-apr-2021) competition.
***Note***: The complete Public-Private leaderboard can be found as a `csv` file in the **Output** section of this notebook.

In [None]:
search_string = "Apr 2021"
import numpy as np
import pandas as pd
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

comps = pd.read_csv('../input/meta-kaggle/Competitions.csv')
our_competition  = comps[comps['Title'].str.contains(search_string,na=False)]
pd.set_option('display.max_columns', None)

CompetitionId       = our_competition["Id"].squeeze()
CompetitionIndex    = our_competition.index.values.astype(int)[0]
all_teams           = pd.read_csv('../input/meta-kaggle/Teams.csv')
teams               = all_teams[all_teams['CompetitionId']==CompetitionId]
teams               = teams.assign(Medal=teams.Medal.fillna(0).astype(int))
COLOR_DICT          = {0: 'deepskyblue', 1: 'gold', 2: 'silver', 3: 'chocolate'}
MEDAL_NAMES         = np.asarray(["None", "Gold", "Silver", "Bronze"])
MEDAL_COLORS        = dict(zip(MEDAL_NAMES, COLOR_DICT.values()))
row                 = comps.loc[CompetitionIndex]
teams               = teams.assign(Medal=MEDAL_NAMES[teams.Medal])

# remove any teams with NaN scores
LB_ranks            = teams[['Id','TeamName','PublicLeaderboardRank','PrivateLeaderboardRank', 'PublicLeaderboardSubmissionId', 'PrivateLeaderboardSubmissionId']].dropna(axis=0, how='any')

# read in the file with the score data
Submissions         = pd.read_csv('../input/meta-kaggle/Submissions.csv')

def get_pub_score(PublicLeaderboardSubmissionId):
    pub  = Submissions.query('Id == @PublicLeaderboardSubmissionId').PublicScoreLeaderboardDisplay.values[0]
    return(pub)

def get_priv_score(PrivateLeaderboardSubmissionId):
    priv = Submissions.query('Id == @PrivateLeaderboardSubmissionId').PrivateScoreLeaderboardDisplay.values[0]
    return(priv)

LB_ranks['PublicLeaderboardScore']  = LB_ranks.apply(lambda x: get_pub_score(x['PublicLeaderboardSubmissionId']),axis=1)
LB_ranks['PrivateLeaderboardScore'] = LB_ranks.apply(lambda x: get_priv_score(x['PrivateLeaderboardSubmissionId']),axis=1)

# make a new dataframe for writing out
LB_ranks_and_scores = (LB_ranks[['Id','TeamName', 'PublicLeaderboardRank', 'PublicLeaderboardScore', 'PrivateLeaderboardRank','PrivateLeaderboardScore']]).set_index('Id')

fig = px.scatter(teams,
                 title='Shakeup plot for: ' + row.Title,
                 x='PublicLeaderboardRank',
                 y='PrivateLeaderboardRank',
                 hover_name='TeamName',
                 hover_data=[
                     'PublicLeaderboardRank',
                     'PrivateLeaderboardRank',
                     'Medal',
                 ],
                 color='Medal',
                 color_discrete_map=MEDAL_COLORS)
fig.update_traces(marker=dict(size=5))
fig.update_layout(showlegend=False)
fig.show()

# save to a csv file
LB_ranks_and_scores.to_csv("LB_ranks_and_scores.csv", index=False)

# Take a look at the Top 20 
LB_ranks_and_scores.sort_values(by='PrivateLeaderboardRank', ascending=True).head(20)