# Interactive Player Data Explorer

This notebook provides interactive visualizations for exploring War Thunder player performance data.

Graphs can be explored interactively:
- Zoom: Mouse wheel or zoom tools in toolbar
- Pan: Click and drag
- Filter: Click on legend items to show/hide data
- Reset: Double-click to reset zoom
- Details: Hover over nodes for extra information

## Setup

Import libraries and dependencies, then get the pathing set up to play nice with the notebook.

In [21]:
# Import standard libraries
import json
import sys
import os
import datetime
from pathlib import Path
from collections import Counter, defaultdict
from typing import Optional

# Import third-party libraries
import pandas as pd
import numpy as np

# Import Plotly for interactive visualizations
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
from plotly.subplots import make_subplots

# Enable offline plotting
pyo.init_notebook_mode(connected=True)

# Add the project root and src directories to Python path
notebook_dir = Path.cwd()
project_root = notebook_dir.parent.parent  # Go up one level from src to project root
src_dir = notebook_dir  # Current directory is src

# Add paths to sys.path if they're not already there
for path in [str(project_root), str(src_dir)]:
    if path not in sys.path:
        sys.path.insert(0, path)

print(f"Project root: {project_root}")
print(f"Source directory: {src_dir}")
print(f"Current working directory: {Path.cwd()}")

Project root: h:\Apps\warthog
Source directory: h:\Apps\warthog\src\replay_data_explorer
Current working directory: h:\Apps\warthog\src\replay_data_explorer


Import project libraries and initialize services

In [22]:
# Import project modules
from src.common.configuration import get_config
from src.common.utilities import get_root_directory
from src.common.enums import BattleType, Country, VehicleType
from src.common.models.vehicle_models import Vehicle
from src.common.factories import ServiceFactory
from src.replay_data_explorer.enums import BattleRatingTier
from src.replay_data_explorer.services import BattleRatingTierClassifier, DataFilterer, DataLoaders, TitleBuilder
from src.replay_data_explorer.common import hex_to_rgba
from src.replay_data_explorer.configuration.graph_configuration import *

# Initialize configuration
config = get_config().replay_data_explorer_config

# Initialize replay_data_grabber services
service_factory = ServiceFactory()
vehicle_service = service_factory.get_vehicle_service()
replay_manager_service = service_factory.get_replay_manager_service()

# Initialize replay_data_explorer services and utility functions
battle_rating_tier_classifier = BattleRatingTierClassifier()
data_filterer = DataFilterer()
data_loaders = DataLoaders(replay_manager_service)
title_builder = TitleBuilder()

Game version 2.47.0.137 from file H:\Apps\warthog\data\vehicle_data\processed_vehicle_data\processed_vehicle_data.2.47.0.137.json not found in release datetime mapping; skipping
Game version 2.47.0.138 from file H:\Apps\warthog\data\vehicle_data\processed_vehicle_data\processed_vehicle_data.2.47.0.138.json not found in release datetime mapping; skipping
Game version 2.47.0.139 from file H:\Apps\warthog\data\vehicle_data\processed_vehicle_data\processed_vehicle_data.2.47.0.139.json not found in release datetime mapping; skipping
Game version 2.47.0.138 from file H:\Apps\warthog\data\vehicle_data\processed_vehicle_data\processed_vehicle_data.2.47.0.138.json not found in release datetime mapping; skipping
Game version 2.47.0.139 from file H:\Apps\warthog\data\vehicle_data\processed_vehicle_data\processed_vehicle_data.2.47.0.139.json not found in release datetime mapping; skipping
Game version 2.47.0.140 from file H:\Apps\warthog\data\vehicle_data\processed_vehicle_data\processed_vehicle_d

## Data Loading

Load replay data and process it for analysis.

In [23]:
tier_df = data_loaders.get_tier_data(
    country_filters=config.country_filters,
    player_name=config.player_name
)

performance_df = data_loaders.get_performance_data(
    country_filters=config.country_filters,
    player_name=config.player_name
)

print(f"Performance data info: {performance_df.shape}")
if not performance_df.empty:
    print(f"Battle Rating range: {performance_df['battle_rating'].min():.1f} - {performance_df['battle_rating'].max():.1f}")
    print(f"Score range: {performance_df['score'].min()} - {performance_df['score'].max()}")
    print(f"Date range: {performance_df['timestamp'].min()} to {performance_df['timestamp'].max()}")
else:
    print("❌ No performance data found with current filters")

Performance data info: (459, 7)
Battle Rating range: 1.0 - 6.7
Score range: 10 - 5380
Date range: 2025-07-10 23:35:40 to 2025-09-13 18:26:01


## Score vs Battle Rating with Tier


### Code

In [55]:
def create_score_vs_br_plot(performance_df: pd.DataFrame, tier_df: pd.DataFrame, *, player_name=None, country_filters=[], std_dev=None):
    """
    Create an interactive Plotly scatter plot of Score vs Battle Rating.

    Args:
        performance_df: DataFrame with performance data
        tier_df: DataFrame with tier status data
        player_name: Optional player name for title
        std_dev: Optional standard deviation for outlier removal

    Returns:
        Plotly figure object
    """
    if performance_df.empty:
        print("No performance data available for plotting")
        return None

    # Merge performance data with tier data
    if not tier_df.empty:
        merged_df = pd.merge(
            performance_df,
            tier_df[['replay_file', 'tier_status', 'br_delta']],
            on='replay_file',
            how='left'
        )
        merged_df['tier_status'] = merged_df['tier_status'].fillna('Unknown')
        merged_df['br_delta'] = merged_df['br_delta'].fillna(0.0)
    else:
        merged_df = performance_df.copy()
        merged_df['tier_status'] = 'Unknown'
        merged_df['br_delta'] = 0.0

    # Remove outliers if specified
    if std_dev is not None:
        merged_df = data_filterer.filter_outliers(merged_df, 'score', std_dev)
        print(f"Data shape after outlier removal: {merged_df.shape}")

    # Create the interactive scatter plot
    fig = go.Figure()

    # Add scatter traces for each tier status
    for tier_status in PLOTLY_BATTLE_RATING_TIER_STATUS_ORDER:
        tier_data = merged_df[merged_df['tier_status'] == tier_status]
        if not tier_data.empty:
            tier_status_name = BATTLE_RATING_TIER_NAMES[tier_status]
            fig.add_trace(
                go.Scatter(
                    x=tier_data['battle_rating'],
                    y=tier_data['score'],
                    mode='markers',
                    name=tier_status_name,
                    marker=dict(
                        color=PLOTLY_BATTLE_RATING_TIER_STATUS_COLORS[tier_status],
                        size=8,
                        line=dict(width=1, color='white'),
                        opacity=0.7
                    ),
                    customdata=tier_data[['player_name', 'country', 'br_delta', 'timestamp', 'replay_file']],
                    hovertemplate=(
                        '<b>%{customdata[0]}</b><br>' +
                        'Battle Rating: %{x}<br>' +
                        'Score: %{y}<br>' +
                        'Country: %{customdata[1]}<br>' +
                        'Tier Status: ' + tier_status_name + '<br>' +
                        'BR Delta: %{customdata[2]:.2f}<br>' +
                        'Date: %{customdata[3]|%Y-%m-%d %H:%M}<br><extra></extra>'
                    )
                )
            )

    # Add overall trend line
    if len(merged_df) > 1:
        # Calculate overall trend line
        z = np.polyfit(merged_df['battle_rating'], merged_df['score'], 1)
        trend_line = np.poly1d(z)

        # Create trend line points
        br_range = np.linspace(merged_df['battle_rating'].min(), merged_df['battle_rating'].max(), 100)
        trend_y = trend_line(br_range)

        fig.add_trace(
            go.Scatter(
                x=br_range,
                y=trend_y,
                mode='lines',
                name=f'Overall Trend (slope: {z[0]:.1f})',
                line=dict(color=hex_to_rgba("#000000", PLOTLY_TRENDLINE_OPACITY), width=2, dash='dash'),
                hovertemplate='Overall Trend<br>BR: %{x}<br>Predicted Score: %{y:.0f}<extra></extra>',
                showlegend=True
            )
        )

    # Add per-tier trend lines
    for tier_status in PLOTLY_BATTLE_RATING_TIER_STATUS_ORDER:
        tier_data = merged_df[merged_df['tier_status'] == tier_status]
        tier_status_name = BATTLE_RATING_TIER_NAMES[tier_status]
        if len(tier_data) > 1:  # Need at least 2 points for a trend line
            try:
                # Calculate trend line for this tier
                z_tier = np.polyfit(tier_data['battle_rating'], tier_data['score'], 1)
                trend_line_tier = np.poly1d(z_tier)

                # Create trend line points for this tier's BR range
                tier_br_range = np.linspace(tier_data['battle_rating'].min(), tier_data['battle_rating'].max(), 50)
                tier_trend_y = trend_line_tier(tier_br_range)

                # Use the same color as the tier but make it a solid line
                tier_color = PLOTLY_BATTLE_RATING_TIER_STATUS_COLORS[tier_status]

                fig.add_trace(
                    go.Scatter(
                        x=tier_br_range,
                        y=tier_trend_y,
                        mode='lines',
                        name=f'{tier_status_name} Trend ({z_tier[0]:.1f})',
                        line=dict(color=hex_to_rgba(tier_color, PLOTLY_TRENDLINE_OPACITY), width=1.5, dash='dot'),
                        hovertemplate=f'{tier_status_name} Trend<br>BR: %{{x}}<br>Predicted Score: %{{y:.0f}}<extra></extra>',
                        showlegend=True,
                        legendgroup=tier_status_name,  # Group with the scatter points
                        visible='legendonly'
                    )
                )
            except Exception as e:
                print(f"Could not calculate trend line for {tier_status_name}: {e}")
                continue

    # Build the graph's title
    title_filters = {}
    if player_name:
        title_filters["Player"] = player_name
    title_filters["Replays"] = len(merged_df)
    if country_filters:
        title_filters[f"Countr{'y' if len(country_filters) else 'ies'}"] = ', '.join([country.value for country in country_filters])
    if std_dev is not None:
        title_filters["σ"] = str(std_dev)
    title = title_builder.build_title("Score vs Battle Rating with Tier", filters=title_filters)

    # Update the graph's layout
    fig.update_layout(
        title={
            'text': title,
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 16}
        },
        xaxis=dict(
            title='Battle Rating',
            gridcolor='lightgray',
            gridwidth=1,
            zeroline=False
        ),
        yaxis=dict(
            title='Score',
            gridcolor='lightgray',
            gridwidth=1,
            zeroline=False
        ),
        plot_bgcolor='white',
        width=1000,
        height=600,
        legend=dict(
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="left",
            x=1.02
        ),
        margin=dict(r=150),  # Add right margin for legend
        hovermode='closest'
    )

    return fig

### Output

In [56]:
create_score_vs_br_plot(performance_df, tier_df, player_name=config.player_name, country_filters=config.country_filters).show()

## Tier Frequency Analysis

### Code

In [None]:
def create_tier_frequency_pie_chart(tier_df: pd.DataFrame, *, player_name=None, country_filters=[]):
    """
    Create an interactive Plotly pie chart showing the frequency of each battle rating tier.

    Args:
        tier_df: DataFrame with tier status data
        player_name: Optional player name for title
        country_filters: List of countries to filter by

    Returns:
        Plotly figure object
    """
    if tier_df.empty:
        print("No tier data available for plotting")
        return None

    # Count the frequency of each tier status
    tier_counts = tier_df['tier_status'].value_counts()

    # Ensure all tier statuses are represented (with 0 counts if necessary)
    all_tier_counts = {}
    for tier_status in PLOTLY_BATTLE_RATING_TIER_STATUS_ORDER:
        tier_status_name = BATTLE_RATING_TIER_NAMES[tier_status]
        count = tier_counts.get(tier_status, 0)
        all_tier_counts[tier_status_name] = count

    # Filter out zero counts for cleaner visualization
    filtered_tier_counts = {k: v for k, v in all_tier_counts.items() if v > 0}

    if not filtered_tier_counts:
        print("No tier data found after filtering")
        return None

    # Create lists for the pie chart
    labels = list(filtered_tier_counts.keys())
    values = list(filtered_tier_counts.values())

    # Map colors to the labels
    colors = []
    for label in labels:
        # Find the corresponding tier status for this label
        for tier_status in PLOTLY_BATTLE_RATING_TIER_STATUS_ORDER:
            if BATTLE_RATING_TIER_NAMES[tier_status] == label:
                colors.append(PLOTLY_BATTLE_RATING_TIER_STATUS_COLORS[tier_status])
                break

    # Create the pie chart
    fig = go.Figure(data=[
        go.Pie(
            labels=labels,
            values=values,
            hole=0.3,  # Creates a donut chart
            marker=dict(
                colors=colors,
                line=dict(color='white', width=2)
            ),
            textinfo='label+percent+value',
            texttemplate='<b>%{label}</b><br>%{percent}<br>(%{value} battles)',
            hovertemplate='<b>%{label}</b><br>' +
                         'Count: %{value}<br>' +
                         'Percentage: %{percent}<br>' +
                         '<extra></extra>',
            textfont=dict(size=12)
        )
    ])

    # Build the graph's title
    title_filters = {}
    if player_name:
        title_filters["Player"] = player_name
    title_filters["Battles"] = len(tier_df)
    if country_filters:
        title_filters[f"Countr{'y' if len(country_filters) == 1 else 'ies'}"] = ', '.join([country.value for country in country_filters])
    title = title_builder.build_title("Battle Rating Tier Frequency", filters=title_filters)

    # Update layout
    fig.update_layout(
        title={
            'text': title,
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 16}
        },
        width=800,
        height=600,
        showlegend=False,
        plot_bgcolor='white'
    )

    return fig


def create_tier_frequency_by_country_bar_chart(performance_df: pd.DataFrame, tier_df: pd.DataFrame, *, player_name=None, country_filters=[]):
    """
    Create an interactive Plotly stacked bar chart showing tier frequency percentages by country.

    Args:
        performance_df: DataFrame with performance data
        tier_df: DataFrame with tier status data
        player_name: Optional player name for title
        country_filters: List of countries to filter by

    Returns:
        Plotly figure object
    """
    if performance_df.empty or tier_df.empty:
        print("No data available for plotting")
        return None

    # Merge performance data with tier data to get country information
    merged_df = pd.merge(
        performance_df[['replay_file', 'country']],
        tier_df[['replay_file', 'tier_status']],
        on='replay_file',
        how='inner'
    )

    if merged_df.empty:
        print("No merged data available for plotting")
        return None

    # Get available countries and filter if specified
    available_countries = list(merged_df['country'].unique())
    if country_filters:
        # Convert Country enum values to strings for comparison
        country_filter_names = [country.value for country in country_filters]
        available_countries = [country for country in available_countries if country in country_filter_names]

    if len(available_countries) == 0:
        print("No countries match the filters")
        return None

    # Calculate tier percentages for each country
    country_tier_data = []
    for country in sorted(available_countries):
        country_data = merged_df[merged_df['country'] == country]
        tier_counts = country_data['tier_status'].value_counts()
        total_battles = len(country_data)

        # Calculate percentages for each tier
        tier_percentages = {}
        for tier_status in PLOTLY_BATTLE_RATING_TIER_STATUS_ORDER:
            tier_status_name = BATTLE_RATING_TIER_NAMES[tier_status]
            count = tier_counts.get(tier_status, 0)
            percentage = (count / total_battles) * 100 if total_battles > 0 else 0
            tier_percentages[tier_status_name] = {
                'percentage': percentage,
                'count': count,
                'total': total_battles
            }

        country_tier_data.append({
            'country': country,
            'tier_percentages': tier_percentages,
            'total_battles': total_battles
        })

    # Create the stacked bar chart
    fig = go.Figure()

    # Add a bar for each tier status
    for tier_status in PLOTLY_BATTLE_RATING_TIER_STATUS_ORDER:
        tier_status_name = BATTLE_RATING_TIER_NAMES[tier_status]

        countries = [data['country'] for data in country_tier_data]
        percentages = [data['tier_percentages'][tier_status_name]['percentage'] for data in country_tier_data]
        counts = [data['tier_percentages'][tier_status_name]['count'] for data in country_tier_data]
        totals = [data['total_battles'] for data in country_tier_data]

        # Only add bars that have data
        if any(p > 0 for p in percentages):
            fig.add_trace(
                go.Bar(
                    name=tier_status_name,
                    x=countries,
                    y=percentages,
                    text=[str(count) if count > 0 else '' for count in counts],
                    textposition='inside',
                    textfont=dict(color='white', size=10),
                    marker_color=PLOTLY_BATTLE_RATING_TIER_STATUS_COLORS[tier_status],
                    customdata=list(zip(counts, totals)),
                    hovertemplate=(
                        f'<b>{tier_status_name}</b><br>' +
                        'Country: %{x}<br>' +
                        'Percentage: %{y:.1f}%<br>' +
                        'Count: %{customdata[0]}<br>' +
                        'Total Battles: %{customdata[1]}<br>' +
                        '<extra></extra>'
                    )
                )
            )

    # Build the graph's title
    title_filters = {}
    if player_name:
        title_filters["Player"] = player_name
    total_battles = sum(data['total_battles'] for data in country_tier_data)
    title_filters["Battles"] = total_battles
    if country_filters:
        title_filters[f"Countr{'y' if len(country_filters) == 1 else 'ies'}"] = ', '.join([country.value for country in country_filters])
    title = title_builder.build_title("Battle Rating Tier Frequency by Country", filters=title_filters)

    # Update layout for stacked bar chart
    fig.update_layout(
        title={
            'text': title,
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 16}
        },
        xaxis=dict(
            title='Country',
            tickangle=45 if len(available_countries) > 5 else 0
        ),
        yaxis=dict(
            title='Percentage (%)',
            range=[0, 100]
        ),
        barmode='stack',
        width=800,
        height=600,
        legend=dict(
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="left",
            x=1.02
        ),
        margin=dict(r=150, b=100),  # Add margins for legend and country labels
        plot_bgcolor='white'
    )

    return fig


def create_tier_frequency_by_br_bar_chart(performance_df: pd.DataFrame, tier_df: pd.DataFrame, *, player_name=None, country_filters=[]):
    """
    Create an interactive Plotly stacked bar chart showing tier frequency percentages by battle rating.

    Args:
        performance_df: DataFrame with performance data
        tier_df: DataFrame with tier status data
        player_name: Optional player name for title
        country_filters: List of countries to filter by

    Returns:
        Plotly figure object
    """
    if performance_df.empty or tier_df.empty:
        print("No data available for plotting")
        return None

    # Merge performance data with tier data to get battle rating information
    merged_df = pd.merge(
        performance_df[['replay_file', 'battle_rating']],
        tier_df[['replay_file', 'tier_status']],
        on='replay_file',
        how='inner'
    )

    if merged_df.empty:
        print("No merged data available for plotting")
        return None

    # Get available battle ratings
    available_brs = sorted(merged_df['battle_rating'].unique())

    if len(available_brs) == 0:
        print("No battle ratings found in data")
        return None

    # Calculate tier percentages for each battle rating
    br_tier_data = []
    for br in available_brs:
        br_data = merged_df[merged_df['battle_rating'] == br]
        tier_counts = br_data['tier_status'].value_counts()
        total_battles = len(br_data)

        # Calculate percentages for each tier
        tier_percentages = {}
        for tier_status in PLOTLY_BATTLE_RATING_TIER_STATUS_ORDER:
            tier_status_name = BATTLE_RATING_TIER_NAMES[tier_status]
            count = tier_counts.get(tier_status, 0)
            percentage = (count / total_battles) * 100 if total_battles > 0 else 0
            tier_percentages[tier_status_name] = {
                'percentage': percentage,
                'count': count,
                'total': total_battles
            }

        br_tier_data.append({
            'battle_rating': br,
            'tier_percentages': tier_percentages,
            'total_battles': total_battles
        })

    # Create the stacked bar chart
    fig = go.Figure()

    # Add a bar for each tier status
    for tier_status in PLOTLY_BATTLE_RATING_TIER_STATUS_ORDER:
        tier_status_name = BATTLE_RATING_TIER_NAMES[tier_status]

        battle_ratings = [data['battle_rating'] for data in br_tier_data]
        percentages = [data['tier_percentages'][tier_status_name]['percentage'] for data in br_tier_data]
        counts = [data['tier_percentages'][tier_status_name]['count'] for data in br_tier_data]
        totals = [data['total_battles'] for data in br_tier_data]

        # Only add bars that have data
        if any(p > 0 for p in percentages):
            fig.add_trace(
                go.Bar(
                    name=tier_status_name,
                    x=battle_ratings,
                    y=percentages,
                    text=[str(count) if count > 0 else '' for count in counts],
                    textposition='inside',
                    textfont=dict(color='white', size=9),
                    marker_color=PLOTLY_BATTLE_RATING_TIER_STATUS_COLORS[tier_status],
                    customdata=list(zip(counts, totals)),
                    hovertemplate=(
                        f'<b>{tier_status_name}</b><br>' +
                        'Battle Rating: %{x}<br>' +
                        'Percentage: %{y:.1f}%<br>' +
                        'Count: %{customdata[0]}<br>' +
                        'Total Battles: %{customdata[1]}<br>' +
                        '<extra></extra>'
                    )
                )
            )

    # Build the graph's title
    title_filters = {}
    if player_name:
        title_filters["Player"] = player_name
    total_battles = sum(data['total_battles'] for data in br_tier_data)
    title_filters["Battles"] = total_battles
    if country_filters:
        title_filters[f"Countr{'y' if len(country_filters) == 1 else 'ies'}"] = ', '.join([country.value for country in country_filters])
    title = title_builder.build_title("Tier Frequency by Battle Rating", filters=title_filters)

    # Update layout for stacked bar chart
    fig.update_layout(
        title={
            'text': title,
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 16}
        },
        xaxis=dict(
            title='Battle Rating',
            tickangle=45 if len(available_brs) > 10 else 0,
            tickvals=available_brs,
            ticktext=[f"{br:.1f}" for br in available_brs],
        ),
        yaxis=dict(
            title='Percentage (%)',
            range=[0, 100]
        ),
        barmode='stack',
        width=1000,
        height=600,
        legend=dict(
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="left",
            x=1.02
        ),
        margin=dict(r=150, b=100),  # Add margins for legend and BR labels
        plot_bgcolor='white'
    )

    return fig

### Output

In [58]:
create_tier_frequency_pie_chart(tier_df, player_name=config.player_name, country_filters=config.country_filters).show()
create_tier_frequency_by_country_bar_chart(performance_df, tier_df, player_name=config.player_name, country_filters=config.country_filters).show()
create_tier_frequency_by_br_bar_chart(performance_df, tier_df, player_name=config.player_name, country_filters=config.country_filters).show()