# ICLR Review Scores Visualization (Google Colab)

This notebook reads ICLR review data from GitHub and creates scatter plots showing Rigor vs Appeal, colored by submission status.

**Data Source**: [ICLR Reviews Data CSV](https://github.com/ktgiahieu/peer-review-pareto-front/blob/master/data/ICLR_reviews_data.csv)


In [None]:
# Install required packages (if not already installed)
# Most packages are pre-installed in Colab, but this ensures compatibility
%pip install -q pandas numpy matplotlib seaborn


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better-looking plots
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10


In [None]:
# Read the CSV data from GitHub
csv_url = 'https://raw.githubusercontent.com/ktgiahieu/peer-review-pareto-front/master/data/ICLR_reviews_data.csv'
df = pd.read_csv(csv_url)

print(f"Loaded {len(df)} reviews")
print(f"\nDataFrame Info:")
df.info()
print(f"\nFirst few rows:")
print(df.head())
print(f"\nStatus distribution:")
print(df['status'].value_counts())


## Define Custom Functions for Rigor and Appeal

You can modify these functions to calculate Rigor and Appeal differently. The default is a simple average of available scores.


In [None]:
def calculate_rigor(soundness, presentation, contribution, rating, confidence):
    """
    Calculate Rigor score from review metrics.
    Default: Simple average of soundness, contribution, and rating.
    
    Parameters:
    -----------
    soundness : float or NaN
    presentation : float or NaN
    contribution : float or NaN
    rating : float or NaN
    confidence : float or NaN
    
    Returns:
    --------
    float : Rigor score
    """
    # Default: Average of soundness, contribution, and rating
    scores = [soundness, contribution, rating]
    valid_scores = [s for s in scores if pd.notna(s)]
    
    if len(valid_scores) == 0:
        return np.nan
    
    return np.mean(valid_scores)


def calculate_appeal(soundness, presentation, contribution, rating, confidence):
    """
    Calculate Appeal score from review metrics.
    Default: Simple average of presentation, contribution, and rating.
    
    Parameters:
    -----------
    soundness : float or NaN
    presentation : float or NaN
    contribution : float or NaN
    rating : float or NaN
    confidence : float or NaN
    
    Returns:
    --------
    float : Appeal score
    """
    # Default: Average of presentation, contribution, and rating
    scores = [presentation, contribution, rating]
    valid_scores = [s for s in scores if pd.notna(s)]
    
    if len(valid_scores) == 0:
        return np.nan
    
    return np.mean(valid_scores)


In [None]:
# Apply the calculation functions to each row
df['rigor'] = df.apply(
    lambda row: calculate_rigor(
        row['soundness'],
        row['presentation'],
        row['contribution'],
        row['rating'],
        row['confidence']
    ), axis=1
)

df['appeal'] = df.apply(
    lambda row: calculate_appeal(
        row['soundness'],
        row['presentation'],
        row['contribution'],
        row['rating'],
        row['confidence']
    ), axis=1
)

print(f"Calculated Rigor and Appeal scores")
print(f"Rigor range: {df['rigor'].min():.2f} to {df['rigor'].max():.2f}")
print(f"Appeal range: {df['appeal'].min():.2f} to {df['appeal'].max():.2f}")
print(f"\nReviews with valid Rigor: {df['rigor'].notna().sum()}")
print(f"Reviews with valid Appeal: {df['appeal'].notna().sum()}")


## Create Scatter Plot for Selected Year


In [None]:
# Select year to plot
selected_year = 'ICLR2024'  # Change this to 'ICLR2024' or 'ICLR2025'

# Filter data for selected year
df_year = df[df['conference'] == selected_year].copy()

# Remove rows with missing Rigor or Appeal
df_plot = df_year.dropna(subset=['rigor', 'appeal'])

print(f"Plotting {len(df_plot)} reviews for {selected_year}")
print(f"\nStatus distribution:")
print(df_plot['status'].value_counts())


In [None]:
# Define custom color map for statuses
def get_status_color(status):
    """Get color for a given status."""
    status_lower = str(status).lower()
    if 'reject' in status_lower and 'desk' not in status_lower:
        return 'red'
    elif 'accept' in status_lower:
        return 'green'
    elif 'withdrawn' in status_lower:
        return 'grey'
    elif 'desk' in status_lower and 'reject' in status_lower:
        return 'black'
    else:
        # Default color for unknown statuses
        return 'blue'

# Create color map for all statuses in the data
statuses = df_plot['status'].unique()
color_map = {status: get_status_color(status) for status in statuses}

# Create scatter plot with custom colors
fig, ax = plt.subplots(figsize=(12, 8))

# Plot each status with custom color
for status in statuses:
    status_data = df_plot[df_plot['status'] == status]
    ax.scatter(
        status_data['rigor'],
        status_data['appeal'],
        label=status,
        color=color_map[status],
        alpha=0.6,
        s=50,
        edgecolors='black',
        linewidths=0.5
    )

# Customize plot
ax.set_xlabel('Rigor', fontsize=14, fontweight='bold')
ax.set_ylabel('Appeal', fontsize=14, fontweight='bold')
ax.set_title(f'Rigor vs Appeal - {selected_year}\n(Colored by Submission Status)', 
             fontsize=16, fontweight='bold', pad=20)
ax.legend(title='Status', bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nPlot created with {len(df_plot)} data points")
print(f"\nColor mapping:")
for status, color in color_map.items():
    print(f"  {status}: {color}")


## Optional: Create Separate Plots for Each Status

This creates individual subplots for each status type.


In [None]:
# Create subplots for each status
statuses = sorted(df_plot['status'].unique())
n_statuses = len(statuses)

# Calculate grid dimensions
n_cols = 3
n_rows = (n_statuses + n_cols - 1) // n_cols

fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5*n_rows))
axes = axes.flatten() if n_statuses > 1 else [axes]

for idx, status in enumerate(statuses):
    ax = axes[idx]
    status_data = df_plot[df_plot['status'] == status]
    
    ax.scatter(
        status_data['rigor'],
        status_data['appeal'],
        alpha=0.6,
        s=50,
        edgecolors='black',
        linewidths=0.5,
        color=color_map[status]
    )
    
    ax.set_xlabel('Rigor', fontsize=12)
    ax.set_ylabel('Appeal', fontsize=12)
    ax.set_title(f'{status} (n={len(status_data)})', fontsize=12, fontweight='bold')
    ax.grid(True, alpha=0.3)

# Hide unused subplots
for idx in range(n_statuses, len(axes)):
    axes[idx].axis('off')

plt.suptitle(f'Rigor vs Appeal by Status - {selected_year}', 
             fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()


## Summary Statistics by Status


In [None]:
# Calculate summary statistics by status
summary = df_plot.groupby('status').agg({
    'rigor': ['mean', 'std', 'count'],
    'appeal': ['mean', 'std'],
    'rating': ['mean', 'std'],
    'soundness': ['mean', 'std'],
    'presentation': ['mean', 'std'],
    'contribution': ['mean', 'std']
}).round(2)

print(f"Summary Statistics for {selected_year}:")
print("="*80)
print(summary)
