Load data files
---

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display_html, HTML

lbs = []
lbs.append(pd.read_csv("../input/patent-ptpm-leaderboards/us-patent-phrase-to-phrase-matching-publicleaderboard.csv"))
lbs.append(pd.read_csv("../input/patent-ptpm-leaderboards/us-patent-phrase-to-phrase-matching-privateleaderboard.csv"))

def print_header(text, size=1):
    display_html(HTML("<h{}>{}</h{}>".format(size, text, size)))

Build table where each row contains old and new positions and scores
---

In [None]:
# Concatenate lbs and remove duplicates to get TeamIDs and TeamNames
teams = pd.concat(lbs[::-1]).drop_duplicates(subset="TeamId")[['TeamId', 'TeamName']] # Go through lbs in reverse order to obtain latest team names

teamNames = []
changeTable = []

for index, row in teams.iterrows():
    
    teamId = row['TeamId']
    teamName = row['TeamName']

    pos = []
    score = []
    
    for i in range(len(lbs)):
        elem = lbs[i].loc[lbs[i]['TeamId'] == teamId]
        
        if len(elem) > 0:
            pos.append(elem.index[0] + 1)
            score.append(elem['Score'].iloc[0])
        else:
            pos.append(np.nan)
            score.append(np.nan)

    teamNames.append(teamName)
    changeTable.append([teamId] + pos + score)
    
changeTable = np.array(changeTable)
teamNames = np.array(teamNames)

In [None]:
for i, k in [(0, 1)]:

    print_header("Change from public LB to private LB:")
    
    order = np.argsort(changeTable[:, 1 + i])
    
    sortedTeamNames = teamNames[order]
    sortedChangeTable = changeTable[order]

    print_header("Median overall rank/score change:", 4)
    print(np.median(np.abs(changeTable[:, 1 + i] - changeTable[:, 1 + k])), 
          round(np.median(np.abs(changeTable[:, 1 + len(lbs) + k] - changeTable[:, 1 + len(lbs) + i])), 4))

    for n in [1000, 500, 200, 100, 50, 20, 10]:
        print_header("Median top {} rank/score change:".format(n), 4)
        print(np.median(np.abs(sortedChangeTable[:n, 1 + i] - sortedChangeTable[:n, 1 + k])), 
              round(np.median(np.abs(sortedChangeTable[:n, 1 + len(lbs) + k] - sortedChangeTable[:n, 1 + len(lbs) + i])), 4))
    
    print_header("Maximum overall rank improvement:", 4)
    argtop = np.argmax(sortedChangeTable[:, 1 + i] - sortedChangeTable[:, 1 + k])

    print("Team:", sortedTeamNames[argtop])
    print("Rank:", int(sortedChangeTable[argtop, 1 + i]), "->", int(sortedChangeTable[argtop, 1 + k]))
    print("Score:", sortedChangeTable[argtop, 1 + len(lbs) + i], "->", sortedChangeTable[argtop, 1 + len(lbs) + k])  
    
    for n in [1000, 500, 200, 100, 50, 20, 10]:
        print_header("Maximum top {} rank improvement:".format(n), 4)
        argtop = np.argmax(sortedChangeTable[:n, 1 + i] - sortedChangeTable[:n, 1 + k])

        print("Team:", sortedTeamNames[argtop])
        print("Rank:", int(sortedChangeTable[argtop, 1 + i]), "->", int(sortedChangeTable[argtop, 1 + k]))
        print("Score:", sortedChangeTable[argtop, 1 + len(lbs) + i], "->", sortedChangeTable[argtop, 1 + len(lbs) + k])  
    
    print_header("Shakeup rank change histogram", 4)
    plt.hist(sortedChangeTable[:, 1 + i] - sortedChangeTable[:, 1 + k], 100)
    plt.show()
    
    print_header("Shakeup score change histogram", 4)
    plt.hist(sortedChangeTable[:, 1 + len(lbs) + i] - sortedChangeTable[:, 1 + len(lbs) + k], 100)
    plt.show()

Plot the shakeup
---

In [None]:
from plotly.offline import init_notebook_mode, iplot, plot
import plotly.graph_objs as go

In [None]:
for i, k in [(0, 1)]:
    
    print_header("Change from public LB to private LB:")
    
    # Scatter old vs new rank
    trace = go.Scatter(x = changeTable[:, 1 + i],
                       y = changeTable[:, 1 + k],
                       mode = "markers",
                       name = "Rank",
                       marker = dict(color = 'rgba(128, 128, 255, 0.8)'),
                       text = np.array(teamNames))

    layout = dict(title = 'Rank Shakeup',
                  xaxis= dict(title= 'Old Rank',ticklen= 5,zeroline= False),
                  yaxis= dict(title= 'New Rank',ticklen= 5,zeroline= False))

    fig = dict(data = [trace], layout = layout)
    iplot(fig)

In [None]:
for i, k in [(0, 1)]:
    
    print_header("Change from public LB to private LB:")
    
    # Scatter old vs new score
    trace = go.Scatter(x = changeTable[:, 1 + len(lbs) + i],
                       y = changeTable[:, 1 + len(lbs) + k],
                       mode = "markers",
                       name = "Score",
                       marker = dict(color = 'rgba(128, 128, 255, 0.8)'),
                       text= np.array(teamNames))

    layout = dict(title = 'Score Shakeup',
                  xaxis= dict(title= 'Old Score',ticklen= 5,zeroline= False),
                  yaxis= dict(title= 'New Score',ticklen= 5,zeroline= False))

    fig = dict(data = [trace], layout = layout)
    iplot(fig)