In [1]:
import os
import pickle
import pandas as pd
import re
import json
from pydantic import BaseModel
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from refresh_db import ImmaculateGridResult, ImmaculateGridUtils
import numpy as np
from copy import deepcopy 
from typing import Dict, List, Tuple, Any
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from datetime import datetime


In [2]:
INPUT_GRID_RESULTS_FILE_PATH = './results.csv'
INPUT_PROMPT_DATA_PATH = './immaculate_grid_prompt_data.csv'
COLOR_MAP = {"Sam": "red", "Keith": "blue", "Will": "purple", "Rachel": "green", "Cliff": "orange"}
PDF_FILENAME = "./graphs_output.pdf"

def to_percent(y, position):
    """Convert a decimal to a percentage string."""
    return f"{100 * y:.0f}%"

# Expand user directory and open the pickle file
data = pd.read_csv(INPUT_GRID_RESULTS_FILE_PATH, index_col=False)
texts = ImmaculateGridUtils.df_to_immaculate_grid_objs(data)

# Get the current grid number by selecting the max grid number from all ImmaculateGridResult objects for each person
current_grid_number = max(
    [
        max(
            [ImmaculateGridUtils.extract_grid_number_from_text(result.text) for result in texts[person] if ImmaculateGridUtils.extract_grid_number_from_text(result.text) is not None]
        )
        for person in texts
    ]
)

# Make a dictionary called "reversed_dict" that is the reverse of texts
# Initialize the reversed dictionary
reversed_dict = {}

# Iterate over each name and the list of grid objects
for name, grid_objects in texts.items():
    for grid_obj in grid_objects:
        # Extract the grid number from the text field of the object
        grid_number = ImmaculateGridUtils.extract_grid_number_from_text(grid_obj.text)

        if grid_number is not None:
            # Set up the reversed dictionary so that the grid number points to the player and their result
            reversed_dict.setdefault(grid_number, {})[name] = grid_obj

### --- Make a dataframe for score, correctness, and average_score_of_correct

# Initialize an empty list to store rows
rows = []

# Loop through the texts to gather data
for person, grid_objects in texts.items():
    for grid_obj in grid_objects:
        # Extract the grid number from the text field of the object
        grid_number = ImmaculateGridUtils.extract_grid_number_from_text(grid_obj.text)
        
        if grid_number is not None:
            # Calculate average rarity
            total_score_of_correct_squares = grid_obj.score - (100 * (9 - grid_obj.correct))
            if grid_obj.correct == 0:
                average_score_of_correct_squares = 100
            else:
                average_score_of_correct_squares = total_score_of_correct_squares / grid_obj.correct
            
            # Produce dataset
            row = {
                "grid_number": grid_number,  # Use a colon here
                "name": grid_obj.name,
                "correct": grid_obj.correct,
                "score": grid_obj.score,
                "average_score_of_correct": average_score_of_correct_squares,
                "date": grid_obj.date,
                "matrix": grid_obj.matrix
            }
            rows.append(row)  # Append the row to the list

# Create the DataFrame from the list of rows
analysis_df = pd.DataFrame(rows)

# Ensure the 'date' column is in datetime format
analysis_df['date'] = pd.to_datetime(analysis_df['date'])

In [3]:
# Function to calculate smoothed metrics (score, correct, average_score_of_correct) from analysis_df
def calculate_smoothed_metrics(analysis_df: pd.DataFrame, smoothness: int) -> pd.DataFrame:
    """Generate a DataFrame of smoothed scores, correct values, and average scores over time."""
    metric_table = []

    # Group the data by 'name' to process each person individually
    grouped = analysis_df.groupby('name')

    # Loop through each person
    for name, group in grouped:
        group = group.sort_values(by='date')  # Sort by date to ensure time-based smoothing
        scores = group['score'].tolist()  # Extract scores
        corrects = group['correct'].tolist()  # Extract correct values
        avg_scores = group['average_score_of_correct'].tolist()  # Extract average score of correct
        dates = group['date'].tolist()  # Extract dates

        # Apply smoothing with the specified window size
        for i in range(smoothness, len(scores)):
            # Extract windows of each metric
            score_window = scores[i - smoothness:i]
            correct_window = corrects[i - smoothness:i]
            avg_score_window = avg_scores[i - smoothness:i]
            
            # Calculate smoothed values for each metric
            valid_scores = [score for score in score_window if score is not None]
            valid_corrects = [correct for correct in correct_window if correct is not None]
            valid_avg_scores = [avg_score for avg_score in avg_score_window if avg_score is not None]

            smoothed_score = sum(valid_scores) / len(valid_scores) if valid_scores else None
            smoothed_correct = sum(valid_corrects) / len(valid_corrects) if valid_corrects else None
            smoothed_avg_score = sum(valid_avg_scores) / len(valid_avg_scores) if valid_avg_scores else None
            smoothed_date = dates[i] if i < len(dates) else None

            # Only add rows where there are valid smoothed values
            if smoothed_score is not None and smoothed_correct is not None and smoothed_avg_score is not None:
                metric_table.append({
                    'name': name,
                    'grid_number': i,  # Could be i, or a corresponding column like group['grid_number']
                    'smoothed_score': smoothed_score,
                    'smoothed_correct': smoothed_correct,
                    'smoothed_avg_score': smoothed_avg_score,
                    'date': smoothed_date
                })

    # Create a DataFrame from the smoothed data
    return pd.DataFrame(metric_table, columns=["name", "grid_number", "smoothed_score", "smoothed_correct", "smoothed_avg_score", "date"]).dropna()

# Function to plot smoothed metrics using the smoothed DataFrame
def plot_smoothed_metrics(smoothed_df: pd.DataFrame, metric: str, title: str, ylabel: str) -> None:
    """Plot the smoothed metrics (score, correct, or average score) over time."""
    plt.figure(figsize=(12, 6))

    # Plot smoothed metrics for each person
    for name in smoothed_df['name'].unique():
        person_data = smoothed_df[smoothed_df['name'] == name]
        
        # Plot line with proper date formatting for the selected metric
        plt.plot(person_data['date'], person_data[metric], label=name, color=COLOR_MAP.get(name, 'blue'))

    # Formatting the plot
    plt.legend()
    plt.title(title)
    plt.xlabel("Date")
    plt.ylabel(ylabel)
    plt.xticks(rotation=45)

    # Adjust x-axis date formatting and tick placement
    plt.gca().xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b %Y'))  # Format to month and year
    plt.gca().xaxis.set_major_locator(plt.MaxNLocator(nbins=10))  # Limit number of x-ticks to avoid clutter

    plt.tight_layout()  # Adjust layout for better display
    plt.show()

# Example Usage:
# Calculate the smoothed metrics from the DataFrame
smoothness = 28
smoothed_metrics_df = calculate_smoothed_metrics(analysis_df, smoothness)

# ---------
def calculate_win_rates(reversed_dict, criterion):
    """
    Calculate win rates based on a given criterion.

    Args:
        reversed_dict (dict): The games data.
        criterion (str): The criterion to calculate win rates ("overall", "correctness", "scores", "last_rate").

    Returns:
        dict: A dictionary of win rates for each person.
    """
    wins = {person: 0 for person in texts}
    for game in reversed_dict.values():
        if criterion == "overall":
            best = max((game[person].correct * 1000) + (1000 - game[person].score) for person in game)
            for person in game:
                effective_score = (game[person].correct * 1000) + (1000 - game[person].score)
                if effective_score == best:
                    wins[person] += 1
        elif criterion == "correctness":
            best = max(game[person].correct for person in game)
            for person in game:
                if game[person].correct == best:
                    wins[person] += 1
        elif criterion == "scores":
            best = min(game[person].score for person in game)
            for person in game:
                if game[person].score == best:
                    wins[person] += 1
        elif criterion == "last_rate":
            best = min((game[person].correct * 1000) + (1000 - game[person].score) for person in game)
            for person in game:
                effective_score = (game[person].correct * 1000) + (1000 - game[person].score)
                if effective_score == best:
                    wins[person] += 1

    for person in wins:
        wins[person] /= len(reversed_dict.values())

    return wins

def plot_win_rates(reversed_dict):
    """Plot win rates based on various criteria."""
    # Set a larger figure size to widen the graphs
    fig, axs = plt.subplots(2, 2, figsize=(12, 8))

    criteria = ["overall", "correctness", "scores", "last_rate"]
    titles = ["Win Rates (Overall)", "Win Rates (Correctness Only)", "Win Rates (Scores Only)", "Last Rate (Overall)"]

    for ax, criterion, title in zip(axs.flat, criteria, titles):
        wins = calculate_win_rates(reversed_dict, criterion)
        ax.bar([person for person in wins], wins.values(), color=[COLOR_MAP[person] for person in wins])
        ax.set_title(title)
        ax.set_yticks([i / 5 for i in range(6)])
        ax.set_ylim(0, 1)
        ax.yaxis.set_major_formatter(FuncFormatter(to_percent))

    # Adjust the layout of the subplots
    plt.subplots_adjust(hspace=0.5, wspace=0.5)
    plt.show()

In [34]:
# Graph number of immaculates
def make_fig_1(texts, COLOR_MAP):
    counts = []
    for person in texts:
        data = [(1 if obj.correct == 9 else 0) for obj in texts[person]]
        counts.append(sum(data))
    plt.bar([person for person in texts], counts, color=[COLOR_MAP[person] for person in texts])
    plt.title("Number of Immaculates")
    plt.show()
    
# Graph distributions
def make_fig_2(texts, COLOR_MAP):
    fig, axs = plt.subplots(3, 2, figsize=(10, 10))  # Create a 3x2 grid for 5 plots
    top_bar = 130
    
    # Flatten the axes array for easier indexing
    axs = axs.flatten()
    
    for i, person in enumerate(texts):
        distribution = [0 for _ in range(0, 10)]
        for row in texts[person]:
            distribution[row.correct] += 1
        
        # Plotting the distribution for each person
        axs[i].bar(range(0, 10), distribution, color=COLOR_MAP[person])
        axs[i].set_xticks(range(0, 10))
        axs[i].set_title(person)
        axs[i].set_ylim(0, 1.2*top_bar)
    
    # Hide the last subplot if it is not used
    if len(texts) < 6:
        axs[5].set_visible(False)
    
    fig.suptitle("Correctness Distribution")
    plt.subplots_adjust(hspace=0.5)
    plt.show()

# Graph average correct
def make_fig_3(analysis_df, COLOR_MAP):
    title = "Average Correct"
    analysis_summary = analysis_df.groupby('name')['correct'].mean().reset_index()
    
    plt.bar(
        analysis_summary.name, 
        analysis_summary.correct, 
        color=[COLOR_MAP[person] for person in analysis_summary.name])
    plt.title(title)
    plt.show()
    
# Graph average score
def make_fig_4(analysis_df, COLOR_MAP):
    title = "Average Score"
    analysis_summary = analysis_df.groupby('name')['score'].mean().reset_index()
    
    plt.bar(
        analysis_summary.name, 
        analysis_summary.score, 
        color=[COLOR_MAP[person] for person in analysis_summary.name])
    plt.title(title)
    plt.show()
    
# Graph average rarity of correct square
def make_fig_5(analysis_df, COLOR_MAP):
    title = "Average Rarity of Correct Square"
    analysis_summary = analysis_df.groupby('name')['average_score_of_correct'].mean().reset_index()
    
    plt.bar(
        analysis_summary.name, 
        analysis_summary.average_score_of_correct, 
        color=[COLOR_MAP[person] for person in analysis_summary.name])
    plt.title(title)
    plt.show()

# Plot each metric separately
def make_fig_6(smoothed_metrics_df):
    plot_smoothed_metrics(smoothed_metrics_df, 'smoothed_score', "Smoothed Scores Over Time", "Smoothed Score")

def make_fig_7(smoothed_metrics_df):
    plot_smoothed_metrics(smoothed_metrics_df, 'smoothed_correct', "Smoothed Correct Over Time", "Smoothed Correct")

def make_fig_8(smoothed_metrics_df):
    plot_smoothed_metrics(smoothed_metrics_df, 'smoothed_avg_score', "Smoothed Avg Score of Correct Over Time", "Smoothed Avg Score of Correct")

def make_fig_9(reversed_dict):
    plot_win_rates(reversed_dict)

# Function to calculate the spaces needed to align the fields
def format_record(rank, name, score, date, game_id, name_width=12, score_width=6, date_width=10, game_id_width=8):
    name_padding = ' ' * (name_width - len(name))
    score_padding = ' ' * (score_width - len(str(score)))
    date_padding = ' ' * (date_width - len(date))
    game_id_padding = ' ' * (game_id_width - len(str(game_id)))
    
    return f'{rank:<4} | {name}{name_padding} | {score}{score_padding} | {date}{date_padding} | {game_id}{game_id_padding}'

def make_fig_10(texts, pdf):
    """
    Creates a summary page in the PDF with the best and worst scores.

    Parameters:
    - texts: Data used for creating score records.
    - pdf: The PdfPages object for saving figures.
    """

    # Prepare score records
    score_records = []
    for person, games in texts.items():
        for game in games:
            grid_id = ImmaculateGridUtils.extract_grid_number_from_text(game.text)
            score_records.append((person, game.score, game.date, grid_id))
    
    # Sort records by score
    sorted_records = sorted(score_records, key=lambda x: x[1])
    
    # Extract best and worst scores
    best_records = sorted_records[:25]
    worst_records = sorted_records[-25:][::-1]
    
    # Create a summary page with results
    plt.figure(figsize=(8.5, 11))
    plt.text(0.5, 0.97, 'Best and Worst Scores', fontsize=25, ha='center', va='top', fontweight='bold')
        
    # Display best scores in a structured format with dynamic spacing
    plt.text(0, 0.85, 'Best Scores:', fontsize=16, ha='left', va='top', fontweight='bold')
    plt.text(0, 0.80, 'Rank | Name        | Score  | Date       | Game ID', fontsize=10, ha='left', va='top')
    
    for i, (name, score, date, game_id) in enumerate(best_records):
        record_text = format_record(i + 1, name, score, date, game_id)
        plt.text(0, 0.75 - i * 0.025, record_text, fontsize=10, ha='left', va='top')
    
    # Worst Scores Section
    plt.text(0.6, 0.85, 'Worst Scores:', fontsize=16, ha='left', va='top', fontweight='bold')
    plt.text(0.6, 0.80, 'Rank | Name        | Score  | Date       | Game ID', fontsize=10, ha='left', va='top')
    
    # Display worst scores in a structured format with dynamic spacing
    for i, (name, score, date, game_id) in enumerate(worst_records):
        record_text = format_record(i + 1, name, score, date, game_id)
        plt.text(0.6, 0.75 - i * 0.025, record_text, fontsize=10, ha='left', va='top')

    
    plt.axis('off')  # Hide axes for the results page
    pdf.savefig()  # Save the results page to the PDF
    plt.close()  # Close the figure

In [35]:
def create_pdf_with_graphs_cover_and_toc(texts, COLOR_MAP, analysis_df, smoothed_metrics_df, reversed_dict, pdf_filename):
    """
    Creates a PDF booklet with a cover page, table of contents, various graphs, 
    and a summary table of best and worst scores based on the provided data.

    Parameters:
    - texts: Data used for creating graphs.
    - COLOR_MAP: Color mapping for the graphs.
    - analysis_df: DataFrame containing analysis metrics.
    - smoothed_metrics_df: DataFrame for smoothed metrics over time.
    - reversed_dict: Dictionary for win rates.
    - pdf_filename: Name of the output PDF file.
    """
    # Use a non-interactive backend to prevent plots from rendering to the screen
    plt.switch_backend('Agg')

    # Get today's date in a readable format
    today_date = datetime.now().strftime('%B %d, %Y')

    # List of graph-making functions with their respective arguments and titles
    graph_functions = [
        (make_fig_1, (texts, COLOR_MAP), "Number of Immaculates"),
        (make_fig_2, (texts, COLOR_MAP), "Correctness Distribution"),
        (make_fig_3, (analysis_df, COLOR_MAP), "Average Correct"),
        (make_fig_4, (analysis_df, COLOR_MAP), "Average Score"),
        (make_fig_5, (analysis_df, COLOR_MAP), "Average Rarity of Correct Square"),
        (make_fig_6, (smoothed_metrics_df,), "Smoothed Scores Over Time"),
        (make_fig_7, (smoothed_metrics_df,), "Smoothed Correct Over Time"),
        (make_fig_8, (smoothed_metrics_df,), "Smoothed Avg Score of Correct Over Time"),
        (make_fig_9, (reversed_dict,), "Win Rates")
    ]

    try:
        # Create a PDF file with multiple pages
        with PdfPages(pdf_filename) as pdf:
            # Create the cover page
            plt.figure(figsize=(8.5, 11))  # Set the page size to standard A4 or letter
            plt.text(0.5, 0.7, 'Immaculate Grid Analysis Results', fontsize=24, ha='center', va='center', fontweight='bold')
            plt.text(0.5, 0.6, f'Date of Analysis: {today_date}', fontsize=16, ha='center', va='center')
            plt.axis('off')  # Hide axes for the cover page
            pdf.savefig()  # Save the cover page to the PDF
            plt.close()  # Close the figure for the cover page

            # Create the Table of Contents page
            plt.figure(figsize=(8.5, 11))
            plt.text(0.5, 0.9, 'Table of Contents', fontsize=20, ha='center', va='top', fontweight='bold')

            # Add the list of graphs to the Table of Contents
            toc_item_y_position = 0.8
            for i, (_, _, title) in enumerate(graph_functions, start=1):
                plt.text(0.1, toc_item_y_position, f'{i}. {title}', fontsize=12, ha='left', va='top')
                toc_item_y_position -= 0.05  # Adjust the position for the next line

            # Add the summary table entry at the end of the TOC
            plt.text(0.1, toc_item_y_position, f'{len(graph_functions) + 1}. Table of Best and Worst Scores', fontsize=12, ha='left', va='top')

            
            plt.axis('off')  # Hide axes for the Table of Contents page
            pdf.savefig()  # Save the Table of Contents page to the PDF
            plt.close()  # Close the figure for the Table of Contents page

            # Add each graph to a new page in the PDF
            for func, args, _ in graph_functions:
                plt.figure()
                func(*args)  # Call the graph-making function with its arguments
                pdf.savefig()  # Save the current figure to the PDF
                plt.close()  # Close the figure to free up memory

            # Create the summary page for best and worst scores
            make_fig_10(texts, pdf)

    except Exception as e:
        print(f"An error occurred: {e}")

    print(f"PDF file '{pdf_filename}' has been created with a cover page, table of contents, and all graphs.")


create_pdf_with_graphs_cover_and_toc(texts, COLOR_MAP, analysis_df, smoothed_metrics_df, reversed_dict, pdf_filename=PDF_FILENAME)

  plt.show()
  plt.show()
  plt.show()
  plt.show()
  plt.show()


PDF file './graphs_output.pdf' has been created with a cover page, table of contents, and all graphs.


  plt.show()


In [6]:
##### Everything below this line incorporates prompt data
#### TODO: Incorporate into PDF

with open(os.path.expanduser(INPUT_PROMPT_DATA_PATH)) as f:
    prompt_df = pd.read_csv(f, header=None)
prompt_df.columns = ["game_id", "00", "01", "02", "10", "11", "12", "20", "21", "22"]
prompt_df = prompt_df.iloc[1:]

new_rows = []
for i, row in prompt_df.iterrows():
    new_row = {}
    for col, val in row.items():
        for char in ["(", "'", ")"]:
            val = val.replace(char, "")
        new_row[col] = val.replace(", ", " + ")
    new_rows.append(new_row)
        

prompt_df = pd.DataFrame(new_rows)
prompt_df['game_id'] = prompt_df['game_id'].astype(int)

In [7]:
team_list = ["Cubs", "Cardinals", "Brewers", "Reds", "Pirates", "Nationals", "Mets", "Marlins", "Phillies", "Braves", "Dodgers", "Diamondbacks", "Rockies", "Giants", "Padres", "Royals", "White Sox", "Twins", "Guardians", "Tigers", "Red Sox", "Yankees", "Blue Jays", "Rays", "Orioles", "Angels", "Athletics", "Astros", "Mariners", "Rangers"]

def is_team(category):
    for team in team_list:
        if team in category:
            return True
    return False

def get_team(category):
    for team in team_list:
        if team in category:
            return team
    return ""

In [8]:
def get_categories(prompt):
    parts = prompt.split(" + ")
    return parts[0].strip(), parts[1].strip()

categories = set()
for person, games in texts.items():
    for game in games:
        id = ImmaculateGridUtils.extract_grid_number_from_text(game.text)
        prompt_rows = prompt_df[prompt_df["game_id"] == id]
        if len(prompt_rows) != 1:
            continue
        prompts = prompt_rows.iloc[0][1:]
        for prompt in prompts:
            part_one, part_two = get_categories(prompt)
            categories.add(part_one)
            categories.add(part_two)

person_to_category = {}
for person, _ in texts.items():
    person_to_category[person] = {cat: [0, 0] for cat in categories}

In [9]:
for person, games in texts.items():
    for game in games:
        id = ImmaculateGridUtils.extract_grid_number_from_text(game.text)
        prompt_rows = prompt_df[prompt_df["game_id"] == id]
        if len(prompt_rows) != 1:
            continue
        prompts = prompt_rows.iloc[0][1:]

        matrix = game.matrix
        for i in range(3):
            for j in range(3):
                part_one, part_two = get_categories(prompts[f"{i}{j}"])
                if matrix[i][j]:
                    person_to_category[person][part_one][0] += 1
                    person_to_category[person][part_two][0] += 1
                person_to_category[person][part_one][1] += 1
                person_to_category[person][part_two][1] += 1

In [10]:
threshold = 25
categories_to_count = {}
for category in categories:
    categories_to_count[category] = []
for _, value in person_to_category.items():
    for category, (correct, total) in value.items():
        categories_to_count[category].append(total)
categories_clearing_threshold = [cat for cat in filter(lambda x: sum(categories_to_count[x]) / len(categories_to_count[x]) > threshold, categories_to_count)]


In [11]:
types = ["Team-Team", "Team-Stat", "Stat-Stat"]
person_to_type = {person: {t: [0, 0] for t in types} for person in person_to_category}

for person, games in texts.items():
    for game in games:
        id = ImmaculateGridUtils.extract_grid_number_from_text(game.text)
        prompt_rows = prompt_df[prompt_df["game_id"] == id]
        if len(prompt_rows) != 1:
            continue
        prompts = prompt_rows.iloc[0][1:]

        matrix = game.matrix
        for i in range(3):
            for j in range(3):
                part_one, part_two = get_categories(prompts[f"{i}{j}"])
                tag = ""
                if is_team(part_one) and is_team(part_two):
                    tag = "Team-Team"
                elif is_team(part_one) != is_team(part_two):
                    tag = "Team-Stat"
                else:
                    tag = "Stat-Stat"
                if matrix[i][j]:
                    person_to_type[person][tag][0] += 1
                person_to_type[person][tag][1] += 1

In [12]:
for person in person_to_type:
    print(person)
    for tag in person_to_type[person]:
        acc = person_to_type[person][tag][0] / person_to_type[person][tag][1]
        print(f"{tag}: {round(100 * acc)}% ({person_to_type[person][tag][1]})")
    print()

Keith
Team-Team: 78% (1635)
Team-Stat: 86% (1920)
Stat-Stat: 93% (576)

Rachel
Team-Team: 42% (1597)
Team-Stat: 81% (1906)
Stat-Stat: 93% (574)

Sam
Team-Team: 62% (1509)
Team-Stat: 90% (1845)
Stat-Stat: 93% (561)

Will
Team-Team: 69% (1626)
Team-Stat: 91% (1944)
Stat-Stat: 96% (597)

Cliff
Team-Team: 63% (1488)
Team-Stat: 90% (1806)
Stat-Stat: 96% (522)



In [13]:
for person, value in person_to_category.items():
    rankings = sorted([(cat, value[cat][0] / value[cat][1], value[cat][1]) for cat in value], key=lambda x: x[1], reverse=True)

    print(f"====={person}=====")
    count = 1
    for i, (category, accuracy, total) in enumerate(rankings):
        if total > threshold:
            print(f"{count}. {category} ({round(accuracy, 2)}) ({total})")
            count += 1
    print("\n\n")

=====Keith=====
1. 40+ WAR Career (1.0) (72)
2. Hall of Fame (0.99) (147)
3. 200+ Wins Career Pitching (0.98) (63)
4. Played Outfield min. 1 game (0.98) (51)
5. Pitched min. 1 game (0.98) (45)
6. New York Mets (0.97) (195)
7. 300+ HR Career Batting (0.97) (78)
8. 30+ HR Season Batting (0.97) (75)
9. 6+ WAR Season (0.97) (69)
10. All Star (0.97) (156)
11. 10+ Win Season Pitching (0.97) (60)
12. 2000+ Hits Career Batting (0.96) (84)
13. 500+ HR Career Batting (0.96) (27)
14. .300+ AVG Season Batting (0.96) (102)
15. Born Outside US 50 States and DC (0.96) (90)
16. New York Yankees (0.95) (216)
17. 2000+ K Career Pitching (0.95) (57)
18. 100+ RBI Season Batting (0.94) (108)
19. 100+ Run Season Batting (0.93) (60)
20. Washington Nationals (0.92) (162)
21. Played First Base min. 1 game (0.91) (57)
22. Only One Team (0.91) (78)
23. Played Catcher min. 1 game (0.9) (63)
24. ≤ 3.00 ERA Season (0.89) (75)
25. San Francisco Giants (0.89) (210)
26. Played In Major Negro Lgs (0.89) (27)
27. Boston

In [14]:
overall = []
for category in categories:
    values = []
    counts = []
    for person in person_to_category:
        values.append(person_to_category[person][category][0] / person_to_category[person][category][1])
        counts.append(person_to_category[person][category][1])
    if is_team(category):
        overall.append((category, sum(values) / len(values)))

print("Consensus Easiest Teams")
overall = sorted(overall, key=lambda x: x[1], reverse=True)
for i, (category, avg) in enumerate(overall):
    print(f"{(i + 1)}. {category} ({round(100 * avg)}%)")

Consensus Easiest Teams
1. New York Yankees (90%)
2. Chicago Cubs (89%)
3. New York Mets (89%)
4. Boston   Red Sox (84%)
5. Los Angeles Dodgers (83%)
6. Washington Nationals (81%)
7. Oakland Athletics (77%)
8. Atlanta Braves (76%)
9. Houston Astros (75%)
10. Texas Rangers (75%)
11. St. Louis Cardinals (75%)
12. San Francisco Giants (74%)
13. Philadelphia Phillies (73%)
14. Cincinnati Reds (71%)
15. Los Angeles Angels (69%)
16. Baltimore Orioles (68%)
17. Cleveland Guardians (68%)
18. Pittsburgh Pirates (68%)
19. Chicago  White Sox (67%)
20. San Diego Padres (66%)
21. Detroit Tigers (65%)
22. Minnesota Twins (64%)
23. Milwaukee Brewers (63%)
24. Toronto  Blue Jays (63%)
25. Seattle Mariners (62%)
26. Kansas City Royals (61%)
27. Arizona Diamondbacks (58%)
28. Miami Marlins (56%)
29. Colorado Rockies (51%)
30. Tampa Bay Rays (51%)


In [15]:
overall = []
for category in categories:
    values = []
    counts = []
    for person in person_to_category:
        values.append(person_to_category[person][category][0] / person_to_category[person][category][1])
        counts.append(person_to_category[person][category][1])
    if is_team(category):
        overall.append((category, np.std(values)))

print("Biggest Team Standard Deviations")
overall = sorted(overall, key=lambda x: x[1], reverse=True)
for i, (category, avg) in enumerate(overall):
    print(f"{(i + 1)}. {category} ({round(100 * avg)}%)")

Biggest Team Standard Deviations
1. Arizona Diamondbacks (17%)
2. Tampa Bay Rays (17%)
3. Miami Marlins (15%)
4. Baltimore Orioles (13%)
5. San Diego Padres (12%)
6. Philadelphia Phillies (12%)
7. Milwaukee Brewers (12%)
8. Minnesota Twins (12%)
9. San Francisco Giants (12%)
10. Pittsburgh Pirates (12%)
11. Seattle Mariners (12%)
12. Cincinnati Reds (11%)
13. Oakland Athletics (11%)
14. Toronto  Blue Jays (10%)
15. Detroit Tigers (10%)
16. Atlanta Braves (10%)
17. Kansas City Royals (10%)
18. Cleveland Guardians (10%)
19. Los Angeles Angels (9%)
20. Los Angeles Dodgers (9%)
21. St. Louis Cardinals (8%)
22. Houston Astros (8%)
23. Texas Rangers (8%)
24. Chicago Cubs (8%)
25. New York Yankees (7%)
26. Colorado Rockies (7%)
27. New York Mets (6%)
28. Chicago  White Sox (6%)
29. Washington Nationals (6%)
30. Boston   Red Sox (6%)


In [16]:
overall = []
for category in filter(is_team, categories):

    max_acc = 0
    for person in person_to_category:
        acc = person_to_category[person][category][0] / person_to_category[person][category][1]
        if acc > max_acc:
            max_acc = acc

    max_people = []
    for person in person_to_category:
        acc = person_to_category[person][category][0] / person_to_category[person][category][1]
        if abs(acc - max_acc) < 0.0001:
            max_people.append(person)
    
    overall.append((category, ", ".join(max_people)))

print("Best Person for Each Team")
for category, people in sorted(overall, key=lambda x: x[0]):
    print(category, "\t" if len(category) > 14 else "\t\t", people)
    


Best Person for Each Team
Arizona Diamondbacks 	 Keith
Atlanta Braves 		 Cliff
Baltimore Orioles 	 Keith
Boston   Red Sox 	 Sam
Chicago  White Sox 	 Will
Chicago Cubs 		 Will
Cincinnati Reds 	 Cliff
Cleveland Guardians 	 Will
Colorado Rockies 	 Keith
Detroit Tigers 		 Keith
Houston Astros 		 Keith
Kansas City Royals 	 Keith
Los Angeles Angels 	 Keith
Los Angeles Dodgers 	 Will
Miami Marlins 		 Keith
Milwaukee Brewers 	 Keith
Minnesota Twins 	 Keith
New York Mets 		 Keith
New York Yankees 	 Cliff
Oakland Athletics 	 Cliff
Philadelphia Phillies 	 Keith
Pittsburgh Pirates 	 Will
San Diego Padres 	 Keith
San Francisco Giants 	 Keith
Seattle Mariners 	 Keith
St. Louis Cardinals 	 Keith
Tampa Bay Rays 		 Keith
Texas Rangers 		 Keith
Toronto  Blue Jays 	 Keith
Washington Nationals 	 Keith


In [17]:
overall = []
for category in filter(is_team, categories):

    min_acc = 101
    for person in person_to_category:
        acc = person_to_category[person][category][0] / person_to_category[person][category][1]
        if acc < min_acc:
            min_acc = acc

    min_people = []
    for person in person_to_category:
        acc = person_to_category[person][category][0] / person_to_category[person][category][1]
        if abs(acc - min_acc) < 0.0001:
            min_people.append(person)
    
    overall.append((category, ", ".join(min_people)))

print("Worst Person for Each Team")
for category, people in sorted(overall, key=lambda x: x[0]):
    print(category, "\t" if len(category) > 14 else "\t\t", people)
    

Worst Person for Each Team
Arizona Diamondbacks 	 Cliff
Atlanta Braves 		 Rachel
Baltimore Orioles 	 Rachel
Boston   Red Sox 	 Rachel
Chicago  White Sox 	 Rachel
Chicago Cubs 		 Cliff
Cincinnati Reds 	 Rachel
Cleveland Guardians 	 Rachel
Colorado Rockies 	 Cliff
Detroit Tigers 		 Rachel
Houston Astros 		 Rachel
Kansas City Royals 	 Rachel
Los Angeles Angels 	 Rachel
Los Angeles Dodgers 	 Rachel
Miami Marlins 		 Cliff
Milwaukee Brewers 	 Rachel
Minnesota Twins 	 Rachel
New York Mets 		 Rachel
New York Yankees 	 Rachel
Oakland Athletics 	 Rachel
Philadelphia Phillies 	 Rachel
Pittsburgh Pirates 	 Rachel
San Diego Padres 	 Rachel
San Francisco Giants 	 Rachel
Seattle Mariners 	 Rachel
St. Louis Cardinals 	 Rachel
Tampa Bay Rays 		 Cliff
Texas Rangers 		 Rachel
Toronto  Blue Jays 	 Rachel
Washington Nationals 	 Rachel


In [18]:
overall = []
for category in filter(lambda x: not is_team(x) and x in categories_clearing_threshold, categories):

    max_acc = 0
    for person in person_to_category:
        acc = person_to_category[person][category][0] / person_to_category[person][category][1]
        if acc > max_acc:
            max_acc = acc

    max_people = []
    for person in person_to_category:
        acc = person_to_category[person][category][0] / person_to_category[person][category][1]
        if abs(acc - max_acc) < 0.0001:
            max_people.append(person)
    
    overall.append((category, ", ".join(max_people)))

print("Best Person for Each Category")
for category, people in sorted(overall, key=lambda x: x[0]):
    print(category, "\t" if len(category) > 14 else "\t\t", people)
    

Best Person for Each Category
.300+ AVG Career Batting 	 Will
.300+ AVG Season Batting 	 Will
10+ Win Season Pitching 	 Will
100+ RBI Season Batting 	 Sam
100+ Run Season Batting 	 Sam
20+ Win Season Pitching 	 Cliff
200+ Hits Season Batting 	 Cliff
200+ K Season Pitching 	 Will
200+ Wins Career Pitching 	 Keith, Will
2000+ Hits Career Batting 	 Will
2000+ K Career Pitching 	 Will
30+ HR Season Batting 	 Rachel, Will
30+ SB Season 		 Will
30+ Save Season Pitching 	 Keith
300+ HR Career Batting 	 Cliff
3000+ Hits Career Batting 	 Will
40+ HR Season Batting 	 Will
40+ WAR Career 		 Keith, Rachel, Sam, Cliff
500+ HR Career Batting 	 Cliff
6+ WAR Season 		 Sam
All Star 		 Will
Born Outside US 50 States and DC 	 Will
Cy Young 		 Cliff
First Round Draft Pick 	 Sam
Gold Glove 		 Cliff
Hall of Fame 		 Keith
MVP 		 Will
Only One Team 		 Sam
Pitched min. 1 game 	 Will
Played Catcher min. 1 game 	 Keith
Played Center Field min. 1 game 	 Sam, Cliff
Played First Base min. 1 game 	 Cliff
Played In M

In [19]:
overall = []
for category in filter(lambda x: not is_team(x) and x in categories_clearing_threshold, categories):

    min_acc = 101
    for person in person_to_category:
        acc = person_to_category[person][category][0] / person_to_category[person][category][1]
        if acc < min_acc:
            min_acc = acc

    min_people = []
    for person in person_to_category:
        acc = person_to_category[person][category][0] / person_to_category[person][category][1]
        if abs(acc - min_acc) < 0.0001:
            min_people.append(person)
    
    overall.append((category, ", ".join(min_people)))

print("Worst Person for Each Category")
for category, people in sorted(overall, key=lambda x: x[0]):
    print(category, "\t" if len(category) > 14 else "\t\t", people)
    

Worst Person for Each Category
.300+ AVG Career Batting 	 Keith
.300+ AVG Season Batting 	 Rachel
10+ Win Season Pitching 	 Rachel
100+ RBI Season Batting 	 Rachel
100+ Run Season Batting 	 Cliff
20+ Win Season Pitching 	 Keith, Rachel
200+ Hits Season Batting 	 Keith
200+ K Season Pitching 	 Rachel
200+ Wins Career Pitching 	 Sam
2000+ Hits Career Batting 	 Rachel
2000+ K Career Pitching 	 Rachel
30+ HR Season Batting 	 Cliff
30+ SB Season 		 Keith
30+ Save Season Pitching 	 Will
300+ HR Career Batting 	 Rachel
3000+ Hits Career Batting 	 Keith
40+ HR Season Batting 	 Rachel
40+ WAR Career 		 Will
500+ HR Career Batting 	 Rachel
6+ WAR Season 		 Rachel
All Star 		 Keith
Born Outside US 50 States and DC 	 Rachel
Cy Young 		 Rachel
First Round Draft Pick 	 Rachel
Gold Glove 		 Keith
Hall of Fame 		 Rachel
MVP 		 Keith
Only One Team 		 Rachel
Pitched min. 1 game 	 Cliff
Played Catcher min. 1 game 	 Rachel
Played Center Field min. 1 game 	 Rachel
Played First Base min. 1 game 	 Sam
Played

In [20]:
hardest_teams = {}

for person, games in texts.items():
    hardest_teams[person] = {team: [0, 0] for team in team_list}
    for game in games:
        id = ImmaculateGridUtils.extract_grid_number_from_text(game.text)
        prompt_rows = prompt_df[prompt_df["game_id"] == id]
        if len(prompt_rows) != 1:
            continue
        prompts = prompt_rows.iloc[0][1:]

        matrix = game.matrix
        for i in range(3):
            for j in range(3):
                part_one, part_two = get_categories(prompts[f"{i}{j}"])
                tag = ""
                if is_team(part_one) and is_team(part_two):
                    team_one = get_team(part_one)
                    team_two = get_team(part_two)
                    if matrix[i][j]:
                        hardest_teams[person][team_one][0] += 1
                        hardest_teams[person][team_two][0] += 1
                    hardest_teams[person][team_one][1] += 1
                    hardest_teams[person][team_two][1] += 1

print("Hardest Team-Team Intersections for Each Person", "\n\n")
for person in hardest_teams:
    print(f"====={person}=====")
    for i, (team, res) in enumerate(sorted(hardest_teams[person].items(), key = lambda x: x[1][0] / x[1][1], reverse=True)):
        print(f"{i + 1}. {team} ({round(100 * res[0] / res[1])}%)")
    print("\n\n\n")

consensus_intersection_difficulty = {}
for team in team_list:
    right = 0
    total = 0
    for person in hardest_teams:
        res = hardest_teams[person][team]
        right += res[0]
        total += res[1]
    consensus_intersection_difficulty[team] = right / total
    
print("=====Consensus=====")
for i, (team, pct) in enumerate(sorted(consensus_intersection_difficulty.items(), key=lambda x: x[1], reverse=True)):
    print(f"{i + 1}. {team} ({round(100 * pct)}%)")


Hardest Team-Team Intersections for Each Person 


=====Keith=====
1. Mets (97%)
2. Yankees (96%)
3. Dodgers (91%)
4. Nationals (90%)
5. Red Sox (90%)
6. Cubs (89%)
7. Cardinals (86%)
8. Giants (86%)
9. Athletics (82%)
10. Phillies (81%)
11. Rangers (80%)
12. Padres (79%)
13. Orioles (78%)
14. Astros (78%)
15. Angels (77%)
16. Marlins (77%)
17. Braves (76%)
18. Mariners (75%)
19. White Sox (73%)
20. Tigers (73%)
21. Brewers (72%)
22. Blue Jays (71%)
23. Diamondbacks (70%)
24. Guardians (70%)
25. Pirates (67%)
26. Rays (66%)
27. Royals (66%)
28. Twins (63%)
29. Reds (59%)
30. Rockies (54%)




=====Rachel=====
1. Cubs (91%)
2. Mets (74%)
3. Nationals (63%)
4. Yankees (60%)
5. Red Sox (60%)
6. Rangers (52%)
7. Dodgers (52%)
8. Athletics (49%)
9. White Sox (47%)
10. Diamondbacks (43%)
11. Cardinals (40%)
12. Astros (40%)
13. Giants (38%)
14. Guardians (37%)
15. Padres (37%)
16. Braves (36%)
17. Rays (36%)
18. Angels (36%)
19. Phillies (35%)
20. Marlins (33%)
21. Blue Jays (33%)
22. Reds (

In [21]:
hardest_team_stats = {}

for person, games in texts.items():
    hardest_team_stats[person] = {team: [0, 0] for team in team_list}
    for game in games:
        id = ImmaculateGridUtils.extract_grid_number_from_text(game.text)
        prompt_rows = prompt_df[prompt_df["game_id"] == id]
        if len(prompt_rows) != 1:
            continue
        prompts = prompt_rows.iloc[0][1:]

        matrix = game.matrix
        for i in range(3):
            for j in range(3):
                part_one, part_two = get_categories(prompts[f"{i}{j}"])
                tag = ""
                if is_team(part_one) and not is_team(part_two):
                    team_one = get_team(part_one)
                    if matrix[i][j]:
                        hardest_team_stats[person][team_one][0] += 1
                    hardest_team_stats[person][team_one][1] += 1
                elif not is_team(part_one) and is_team(part_two):
                    team_two = get_team(part_two)
                    if matrix[i][j]:
                        hardest_team_stats[person][team_two][0] += 1
                    hardest_team_stats[person][team_two][1] += 1

print("Hardest Team-Stats Intersections for Each Person", "\n\n")
for person in hardest_team_stats:
    print(f"====={person}=====")
    for i, (team, res) in enumerate(sorted(hardest_team_stats[person].items(), key = lambda x: x[1][0] / x[1][1], reverse=True)):
        print(f"{i + 1}. {team} ({round(100 * res[0] / res[1])}%)")
    print("\n\n\n")

consensus_intersection_difficulty = {}
for team in team_list:
    right = 0
    total = 0
    for person in hardest_teams:
        res = hardest_team_stats[person][team]
        right += res[0]
        total += res[1]
    consensus_intersection_difficulty[team] = right / total
    
print("=====Consensus=====")
for i, (team, pct) in enumerate(sorted(consensus_intersection_difficulty.items(), key=lambda x: x[1], reverse=True)):
    print(f"{i + 1}. {team} ({round(100 * pct)}%)")

Hardest Team-Stats Intersections for Each Person 


=====Keith=====
1. Mets (99%)
2. Nationals (95%)
3. Giants (95%)
4. Yankees (95%)
5. Twins (93%)
6. Braves (92%)
7. Phillies (92%)
8. Marlins (91%)
9. Astros (89%)
10. Brewers (89%)
11. Padres (89%)
12. Tigers (88%)
13. Red Sox (87%)
14. Mariners (87%)
15. Royals (86%)
16. Reds (86%)
17. Dodgers (86%)
18. Cardinals (86%)
19. Pirates (85%)
20. Cubs (84%)
21. Rangers (83%)
22. Angels (82%)
23. Rays (82%)
24. Diamondbacks (81%)
25. Orioles (80%)
26. Athletics (79%)
27. Rockies (77%)
28. Blue Jays (75%)
29. Guardians (71%)
30. White Sox (65%)




=====Rachel=====
1. Nationals (98%)
2. Yankees (98%)
3. Astros (96%)
4. Mets (95%)
5. Cubs (93%)
6. Red Sox (93%)
7. Braves (90%)
8. Cardinals (89%)
9. Brewers (89%)
10. Mariners (85%)
11. Giants (84%)
12. Dodgers (84%)
13. Tigers (84%)
14. Pirates (83%)
15. Reds (80%)
16. Angels (80%)
17. Phillies (79%)
18. White Sox (78%)
19. Rockies (77%)
20. Athletics (72%)
21. Rangers (70%)
22. Diamondbacks 

In [22]:
most_common_exact_intersections = {}

for game in texts["Sam"]:
    id = ImmaculateGridUtils.extract_grid_number_from_text(game.text)
    prompt_rows = prompt_df[prompt_df["game_id"] == id]
    if len(prompt_rows) != 1:
        continue
    prompts = prompt_rows.iloc[0][1:]
    for i in range(3):
        for j in range(3):
            part_one, part_two = get_categories(prompts[f"{i}{j}"])
            key = " + ".join(sorted([part_one, part_two]))
            if key not in most_common_exact_intersections:
                most_common_exact_intersections[key] = 0
            most_common_exact_intersections[key] += 1

for i, (combo, count) in enumerate(sorted(most_common_exact_intersections.items(), key = lambda x: x[1], reverse=True)):
    if count >= 5:
        print(f"{i + 1}. {combo} ({count})")

1. Houston Astros + New York Mets (7)
2. Cleveland Guardians + Oakland Athletics (7)
3. New York Yankees + San Francisco Giants (7)
4. Boston   Red Sox + Philadelphia Phillies (7)
5. Atlanta Braves + Pittsburgh Pirates (6)
6. Chicago  White Sox + Minnesota Twins (6)
7. Baltimore Orioles + Los Angeles Dodgers (6)
8. Boston   Red Sox + New York Yankees (6)
9. Cleveland Guardians + Toronto  Blue Jays (6)
10. Chicago Cubs + Oakland Athletics (6)
11. Oakland Athletics + Washington Nationals (6)
12. Philadelphia Phillies + Texas Rangers (6)
13. Detroit Tigers + St. Louis Cardinals (6)
14. Boston   Red Sox + Texas Rangers (6)
15. Boston   Red Sox + Cleveland Guardians (6)
16. Los Angeles Dodgers + New York Yankees (6)
17. Detroit Tigers + Minnesota Twins (6)
18. New York Yankees + San Diego Padres (6)
19. Boston   Red Sox + New York Mets (6)
20. San Francisco Giants + Toronto  Blue Jays (6)
21. Boston   Red Sox + Oakland Athletics (6)
22. Atlanta Braves + Chicago Cubs (6)
23. Houston Astros +

In [23]:
team_to_full_names = {}
full_names_to_team = {}
for team in team_list:
    for category in categories:
        if team in category:
            team_to_full_names[team] = category
            full_names_to_team[category] = team
            

missing = 0
present = 0
missing_maps = {}
print("Empty Team-Team Intersections")
for i, team in enumerate(sorted(team_list)):
    for other in sorted(team_list)[i + 1:]:
        key = " + ".join([team_to_full_names[team], team_to_full_names[other]])
        other_key =  " + ".join([team_to_full_names[other], team_to_full_names[team]])
        if key not in most_common_exact_intersections and other_key not in most_common_exact_intersections:
            print(key)
            missing += 1
            if team not in missing_maps:
                missing_maps[team] = 0
            if other not in missing_maps:
                missing_maps[other] = 0
            missing_maps[team] += 1
            missing_maps[other] += 1
        else:
            present += 1    

print("\n\n\n\nTotal Missing")
for i, (team, count) in enumerate(sorted(missing_maps.items(), key=lambda x: x[1], reverse=True)):
    if count > 0:
        print(f"{i + 1}. {team} ({count})")

Empty Team-Team Intersections
Toronto  Blue Jays + Colorado Rockies
Atlanta Braves + Tampa Bay Rays
Milwaukee Brewers + Colorado Rockies
Chicago Cubs + Arizona Diamondbacks
Chicago Cubs + Miami Marlins
Arizona Diamondbacks + Miami Marlins
Arizona Diamondbacks + Colorado Rockies
Miami Marlins + Tampa Bay Rays
Miami Marlins + Colorado Rockies
Baltimore Orioles + Tampa Bay Rays
Tampa Bay Rays + Colorado Rockies




Total Missing
1. Rockies (5)
2. Rays (4)
3. Marlins (4)
4. Diamondbacks (3)
5. Cubs (2)
6. Blue Jays (1)
7. Braves (1)
8. Brewers (1)
9. Orioles (1)


In [24]:
most_common_exact_intersections = {}

for game in texts["Rachel"]:
    id = ImmaculateGridUtils.extract_grid_number_from_text(game.text)
    prompt_rows = prompt_df[prompt_df["game_id"] == id]
    if len(prompt_rows) != 1:
        continue
    prompts = prompt_rows.iloc[0][1:]
    for i in range(3):
        for j in range(3):
            part_one, part_two = get_categories(prompts[f"{i}{j}"])
            teams = [get_team(part_one), get_team(part_two)]
            if "Guardians" in teams and "Athletics" in teams:
                print(game.matrix[i][j])

False
False
False
False
True
True
True


In [25]:
with open("./immaculate_grid_names.txt") as f:
    names = [line.strip() for line in f.readlines()]

FileNotFoundError: [Errno 2] No such file or directory: './immaculate_grid_names.txt'

In [None]:
name_to_count = {}
for name in names:
    name_to_count[name] = name_to_count.get(name, 0) + 1
sorted_names = sorted(name_to_count.items(), key=lambda x: x[1], reverse=True)

In [None]:
for i, (name, count) in enumerate(sorted_names):
    print(f"{i + 1}. {name} ({count})")

In [None]:
def create_variants(existing, all):
    if len(existing) == 9:
        all.add("".join([str(x) for x in existing]))
        return
    create_variants(deepcopy(existing) + [0], all)
    create_variants(deepcopy(existing) + [1], all)
    return
combos = set()
create_variants([], combos)

In [None]:
copied_variants = deepcopy(combos)
full_combos = set()
dates = []
for grid_num in range(current_grid_number):
    for person in texts:
        if grid_num not in texts[person]:
            continue
        obj = texts[person][grid_num]
        current = ""
        for text_row in obj.text.split("\n"):
            for char in text_row:
                if ord(char) == 11036: #"⬜️":
                    current += "0"
                elif ord(char) == 129001: #"🟩":
                    current += "1"
        assert len(current) == 9
        full_combos.add(current)
        if current in copied_variants:
            copied_variants.remove(current)
            dates.append((obj.date, grid_num, person, current, obj.text))

sorted_dates = sorted(dates, key = lambda x: x[1], reverse=True)
for date, _, person, combo, text in sorted_dates[:10]:
    print(person, date, "\n", text, end="\n\n\n")

In [None]:
texts["Keith"][152]