# GO TO LAST CELL FOR RUNNING

In [92]:
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict
import ast
from collections import defaultdict
import pandas as pd
import ast
from collections import defaultdict
import matplotlib.pyplot as plt
from collections import defaultdict
import concurrent.futures
import functools

In [93]:
ARENA_ID_TO_NUMBER_MAP = {
    '54000001': '1',
    '54000002': '2',
    '54000003': '3',
    '54000004': '4',
    '54000005': '5',
    '54000006': '6',
    '54000008': '7',  
    '54000009': '8',
    '54000010': '9',
    '54000007': '10', 
    '54000024': '11',
    '54000011': '12',
    '54000055': '13',
    '54000056': '14',
    '54000012': '15',
    '54000013': '16',
    '54000014': '17',
    '54000015': '18',
    '54000016': '19',
    '54000017': '20',
    '54000018': '21',
    '54000019': '22',
    '54000020': '23',
    '54000031': '24',
}
def convert_data(path):
    data = pd.read_csv(path)
    data['arena'] = data['arena'].astype(str)
    data['arena_num'] = data['arena'].map(ARENA_ID_TO_NUMBER_MAP)
    player_tag = pd.concat([data['players_0_hashtag'], data['players_1_hashtag']], ignore_index=True)
    spells = pd.concat([data['players_0_spells'], data['players_1_spells']], ignore_index=True)
    arena = pd.concat([data['arena_num'], data['arena_num']], ignore_index=True)
    new_data = pd.DataFrame({
        'player_tag': player_tag,
        'card_list': spells,
        'arena': arena 
    })
    new_data = new_data.dropna(subset=['arena'])
    
    data = new_data.drop_duplicates(subset='player_tag', keep='first')
    return data

In [94]:
def process_dataframe_with_totals(input_df):
    """
    Processes a DataFrame and returns a nested dictionary of card counts
    by arena, including a 'total_cards' key for each arena.
    
    This version assumes fixed column names 'arena' and 'card_list'.
    """
    ARENA_COLUMN_NAME = 'arena'
    CARDS_COLUMN_NAME = 'card_list'
    arena_card_counts = defaultdict(lambda: defaultdict(int))
    for _, row in input_df.iterrows():
        try:
            # Use the hard-coded column names
            arena_num = row[ARENA_COLUMN_NAME]
            card_list_str = row[CARDS_COLUMN_NAME]

            card_list = ast.literal_eval(card_list_str)
            
            if not isinstance(card_list, list):
                continue
            for card_tuple in card_list:
                if isinstance(card_tuple, tuple) and len(card_tuple) > 0:
                    arena_card_counts[arena_num][card_tuple[0]] += 1
        except (ValueError, SyntaxError, TypeError, KeyError) as e:
            pass 
    final_dict = {}
    for arena, counts in arena_card_counts.items():
        total_count = sum(counts.values())
        final_arena_data = dict(counts)
        final_arena_data['total_cards'] = total_count
        final_dict[arena] = final_arena_data
    return final_dict

In [95]:
def add_arena_dicts(dict1, dict2):
    """
    Merges two arena card count dictionaries, summing the counts for
    common arenas and cards.
    
    Args:
        dict1 (dict): The first arena count dictionary.
        dict2 (dict): The second arena count dictionary.

    Returns:
        dict: A new dictionary with the merged and summed counts.
    """
    merged_counts = defaultdict(lambda: defaultdict(int))
    def populate_counts(source_dict):
        for arena, inner_dict in source_dict.items():
            for card, count in inner_dict.items():
                if card != 'total_cards':
                    merged_counts[arena][card] += count
    populate_counts(dict1)
    populate_counts(dict2)
    final_dict = {}
    for arena, counts in merged_counts.items():
        total = sum(counts.values())
        final_arena_data = dict(counts)
        final_arena_data['total_cards'] = total
        final_dict[arena] = final_arena_data
    return final_dict


In [96]:
def calculate_card_percentages(arena_counts):
    """
    Converts a {arena -> card -> count} dict to {card -> arena -> percentage}.
    
    The percentage is the card's count within an arena divided by the
    total cards in that arena.
    
    Args:
        arena_counts (dict): The dictionary from process_dataframe_with_totals
    
    Returns:
        dict: A new dictionary with keys as card names.
    """
    
    # 1. Get all unique arenas
    all_arenas = list(arena_counts.keys())
    
    # 2. Get all unique card names (so we can build the new dict)
    all_cards = set()
    for inner_dict in arena_counts.values():
        for card in inner_dict.keys():
            # Don't add 'total_cards' as a card
            if card != 'total_cards':
                all_cards.add(card)
                
    # 3. Initialize the new dictionary with a default 0.0% for every card/arena
    card_percentages = {}
    for card in all_cards:
        # Create a new inner dict for each card,
        # pre-filling all known arenas with 0.0
        card_percentages[card] = {arena: 0.0 for arena in all_arenas}
            
    # 4. Loop through the input dict and fill in the actual percentages
    for arena, inner_dict in arena_counts.items():
        
        # Get the total, default to 0 if key is missing or invalid
        total = inner_dict.get('total_cards', 0)
        
        # Skip this arena if it has no cards, to avoid ZeroDivisionError
        if total == 0:
            continue
            
        for card, count in inner_dict.items():
            if card == 'total_cards':
                continue
            
            # Calculate the in-arena usage percentage
            percentage = (count / total) * 100.0
            
            # Update the new dictionary, rounding to 2 decimal places
            card_percentages[card][arena] = round(percentage, 2)
            
    return card_percentages



In [97]:
import pandas as pd
import plotly.graph_objects as go

def plot_all_cards_interactive_smooth(all_data):
    """
    Creates a single, interactive Plotly line chart for all cards,
    applying Tufte/Cleveland principles, with smoother lines and adjustable aspect ratio.
    
    Args:
        all_data (dict): The complete card_percentage_dict
    """
    
    print("Generating smooth, clean interactive plot for all cards...")

    # --- 1. Transform data to a long DataFrame ---
    all_plot_data = []
    all_arenas = range(1, 25) 
    all_cards = sorted(list(all_data.keys()))
    
    for card_name in all_cards:
        card_data = all_data.get(card_name, {})
        for arena_num in all_arenas:
            arena_key = str(arena_num)
            percentage = card_data.get(arena_key, 0.0)
            
            all_plot_data.append({
                'card': card_name,
                'arena': arena_num,
                'percentage': percentage
            })

    if not all_plot_data:
        print("No data to plot.")
        return

    df = pd.DataFrame(all_plot_data)
    df['arena'] = df['arena'].astype(str)

    # --- 2. Create the Plotly Figure ---
    fig = go.Figure()

    # --- 3. Add one (initially hidden) trace for each card ---
    for card_name in all_cards:
        df_card = df[df['card'] == card_name]
        
        fig.add_trace(
            go.Scatter(
                x=df_card['arena'],
                y=df_card['percentage'],
                name=card_name,
                mode='lines+markers', 
                # --- SMOOTHER LINES CHANGE ---
                line=dict(width=2, shape='spline', smoothing=0.8), # 'spline' for smooth curves
                # ---
                marker=dict(size=6),
                hovertemplate=( 
                    f'<b>{card_name}</b><br>'
                    'Arena: %{x}<br>'
                    'Usage: %{y:.1f}%<extra></extra>' 
                ),
                visible=False 
            )
        )

    if not fig.data:
        print("Error: No data to plot after processing.")
        return
        
    fig.data[0].visible = True

    # --- 4. Create the Dropdown Menu (No change in logic) ---
    dropdown_buttons = []
    for i, card_name in enumerate(all_cards):
        visibility = [False] * len(all_cards)
        visibility[i] = True
        
        dropdown_buttons.append(
            dict(
                label=card_name,
                method="update",
                args=[
                    {"visible": visibility},
                    {"title": f'Usage Percentage for "{card_name}" (Arenas 2-25)'}
                ]
            )
        )

    # --- 5. Style the Layout (Aspect Ratio Change) ---
    fig.update_layout(
        template='simple_white', 
        plot_bgcolor='white',    
        paper_bgcolor='white',
        title_text=f'Usage Percentage for "{all_cards[0]}" (Arenas 2-25)',
        title_x=0.5, 
        xaxis_title="Arena Number",
        yaxis_title="Usage Percentage (%)",
        font=dict(family='Arial, sans-serif', color='black'), 
        
        xaxis=dict(
            showgrid=False,   
            zeroline=False,   
            tickfont=dict(size=12)
        ),
        yaxis=dict(
            gridcolor='#EAEAEA', 
            gridwidth=1,
            zeroline=False,     
            tickfont=dict(size=12),
            rangemode='tozero'  
        ),
        
        updatemenus=[
            dict(
                active=0,
                buttons=dropdown_buttons,
                direction="down",
                pad={"r": 10, "t": 10},
                showactive=True,
                x=0.5,
                xanchor="center",
                y=1.15,
                yanchor="top"
            )
        ],
        # --- ASPECT RATIO CHANGE ---
        width=1000, # Adjust width (e.g., from 800 to 1000)
        height=600, # Adjust height (e.g., keep at 600 or change)
        # ---
    )
    
    # Save the plot as an interactive HTML file
    filename = "all_cards_usage_plot_smooth.html"
    fig.write_html(filename)
    
    # Also display the plot in your notebook/browser
    fig.show()
    
    print(f"\nSuccessfully saved interactive plot to: {filename}")

In [98]:
def process_item_to_dict(item):
    """
    Helper function to run in the thread pool.
    It combines the two steps: convert and process.
    """
    try:
        dataframe = convert_data(item) 
        arena_dict = process_dataframe_with_totals(dataframe)
        return arena_dict
    except Exception as e:
        print(f"Error processing item {item}: {e}")
        return None # Return None for failed items

# RUNNING

In [99]:
input_data = ["/Users/raghava/Downloads/New Folder With Items/preprocessed_battle_log_full_batch-2.csv"]
print(f"Starting parallel processing for {len(input_data)} items...")
converted_data = []
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        results_iterator = executor.map(process_item_to_dict, input_data)
        converted_data = [result for result in results_iterator if result is not None]
print(f"Successfully processed {len(converted_data)} items.")
if not converted_data:
    print("No data processed. Exiting.")
arens_dict = functools.reduce(add_arena_dicts, converted_data)
print("Calculating percentages...")
card_percentage_dict = calculate_card_percentages(arens_dict)


Starting parallel processing for 1 items...
Successfully processed 1 items.
Calculating percentages...


In [100]:
plot_all_cards_interactive_smooth(card_percentage_dict)

Generating smooth, clean interactive plot for all cards...



Successfully saved interactive plot to: all_cards_usage_plot_smooth.html
