# Visualization of progress in the January TPS competition (aka "Overfitting movie")

This notebook shows how to create a movie (animated GIF) from the 62 public predictions.

The movie visualizes how the predictions converge better and better to an optimum. As symbols of progress, the movie
- displays a growing progress bar
- displays the prediction number and the decreasing lb SMAPE of the predictions numerically
- continuously changes color from blue to pink

The movie wouldn't have been possible without @[jbomitchell](https://www.kaggle.com/jbomitchell)'s [dataset](https://www.kaggle.com/jbomitchell/tps-submissions-january-2022).


In [None]:
import glob
import imageio
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import display, Image


In [None]:
# Read the data
test_df = pd.read_csv('../input/tabular-playground-series-jan-2022/test.csv', parse_dates=['date'])
names = glob.glob('../input/tps-submissions-january-2022/*.csv')
names.sort(reverse=True) # list of file names from worst to best prediction
best_df = pd.read_csv(names[-1]) # best prediction which is marked with crosses in every frame

# Prepare the colormap: movie starts blue and ends pink
cm = plt.cm.get_cmap('cool', len(names))


In [None]:
def create_movie(quarter, bottom, top, tx, ty):
    """Create an animated GIF which displays the predictions of a selected quarter year
    
    Parameters:
    -----------
    quarter : quarter of 2019 (1, 2, 3 or 4)
    bottom, top : lower and upper limits of the predictions
                  (which must be the same in every frame of the movie)
    tx, ty : coordinates of the text and the progress bar
    """
    for i, name in enumerate(names):
        df = pd.read_csv(name)
        selection = ((test_df.country == 'Norway') &
                     (test_df.store == 'KaggleRama') &
                     (test_df['product'] == 'Kaggle Hat') &
                     ((test_df.date.dt.month-1) // 3 == quarter-1)) 
        plt.figure(figsize=(12, 5))
        plt.gca().set_facecolor('k') # background
        plt.scatter(best_df[selection].row_id, best_df[selection].num_sold,
                    s=40, marker='+', color=cm((len(names)-1) / len(names)))
        plt.scatter(df[selection].row_id, df[selection].num_sold,
                    s=25, color=cm(i / len(names))) # predictions
        plt.plot([tx+180, tx+180 + i * 10], [ty+15, ty+15],
                 lw=3, color=cm(i / len(names))) # progress bar
        plt.ylim(bottom, top)
        ticks = test_df.groupby(test_df.date.dt.month).first().iloc[quarter*3-3:quarter*3+1]
        plt.xticks(ticks=ticks.row_id,
                   labels=ticks.date.dt.strftime('%b %Y'))
        plt.text(tx, ty,
                 f"{i:2d}: {int(name[name.index('2022/')+5:name.index('2022/')+11])/100000:.5f}",
                 color='w')
        plt.title(f"Q{quarter} predictions")
        plt.savefig(f"frame_q{quarter}_{i:02d}.png")
        if False and i < 2: plt.show() # show the first few frames
        plt.close()

    images = []
    for filename in sorted(glob.glob(f'frame_q{quarter}_*.png')):
        images.append(imageio.imread(filename))
    imageio.mimsave(f'movie_q{quarter}.gif', images, duration=0.3)
    display(Image(f'movie_q{quarter}.gif'))

create_movie(1, bottom=900, top=1700, tx=26600, ty=1550)
create_movie(2, bottom=800, top=2000, tx=28000, ty=950)
create_movie(3, bottom=650, top=1300, tx=29600, ty=700)
create_movie(4, bottom=550, top=3000, tx=31250, ty=2700)
