In [3]:
import duckdb
con = duckdb.connect("c:/Code/Local Code/deadlock_match_prediction/data/deadlock.db")
raw_con = duckdb.connect("c:/Code/Local Code/deadlock_match_prediction/match_player_raw.duckdb")

In [30]:
con.execute("DROP TABLE IF EXISTS player_trends")
con.execute("DROP TABLE IF EXISTS player_rolling_stats")
tables = con.execute("""
    SELECT name FROM sqlite_master WHERE type='table';
    """).fetchall()
print(f"tables: {tables}\n")

tables: [('hero_trends',), ('matches',), ('player_hero_trends',), ('player_matches',), ('player_matches_history',)]



In [31]:
import services.database_functions as dbf

dbf.create_player_trends_table(con)
dbf.create_player_rolling_stats(con)
tables = con.execute("""
    SELECT name FROM sqlite_master WHERE type='table';
    """).fetchall()
print(f"tables: {tables}\n")

tables: [('hero_trends',), ('matches',), ('player_hero_trends',), ('player_matches',), ('player_matches_history',), ('player_rolling_stats',), ('player_trends',)]



In [14]:
con.close()
raw_con.close()

In [7]:
con = duckdb.connect("c:/Code/Local Code/deadlock_match_prediction/data/deadlock.db")
con.execute("ATTACH DATABASE 'match_player_raw.duckdb' AS staging")

<duckdb.duckdb.DuckDBPyConnection at 0x2161ff1da30>

In [8]:
# 2. Ensure the team column exists
con.execute("""
ALTER TABLE player_matches_history
ADD COLUMN IF NOT EXISTS team INTEGER
""")

# 3. Populate team via an UPDATE…JOIN
con.execute("""
UPDATE player_matches_history AS pmh
SET team = sc.team
FROM staging.staging_cleaned AS sc
WHERE pmh.account_id = sc.account_id
  AND pmh.match_id   = sc.match_id
  AND pmh.team IS NULL
""")

# 4. Detach the staging DB
con.execute("DETACH DATABASE staging")

<duckdb.duckdb.DuckDBPyConnection at 0x2161ff1da30>

In [12]:
con.execute("""
UPDATE player_matches_history
SET team = CAST(
    REGEXP_REPLACE(CAST(team AS VARCHAR), '[^0-9]+', '', 'g')
  AS INTEGER)
WHERE team::VARCHAR LIKE '(%'
""")

<duckdb.duckdb.DuckDBPyConnection at 0x2161ff1da30>

In [13]:
query = con.execute("""
    SELECT team FROM player_matches_history
        limit 25;
    """).fetchall()
print(query)

[(1,), (0,), (0,), (1,), (0,), (1,), (0,), (0,), (0,), (0,), (1,), (1,), (1,), (0,), (1,), (1,), (0,), (0,), (0,), (0,), (0,), (0,), (1,), (0,), (1,)]


Claude SQL to count matches in matches where all 12 players exist in player_matches_history with at least 5 matches per player.

In [38]:
import duckdb
import pandas as pd

# Connect to the database
con = duckdb.connect("data/deadlock.db")

# Query to find matches where all 12 players have player trends 
# and at least 5 references in player_matches_history
query = """
WITH match_player_counts AS (
    -- Count players per match
    SELECT 
        m.match_id,
        COUNT(DISTINCT pm.account_id) AS player_count
    FROM 
        matches m
    JOIN 
        player_matches pm ON m.match_id = pm.match_id
    GROUP BY 
        m.match_id
),
players_with_trends AS (
    -- Find players who have trends
    SELECT 
        pm.match_id,
        pm.account_id,
        CASE WHEN pt.account_id IS NOT NULL THEN 1 ELSE 0 END AS has_trend
    FROM 
        player_matches pm
    LEFT JOIN 
        player_trends pt ON pm.account_id = pt.account_id
),
players_with_history AS (
    -- Count history entries per player
    SELECT 
        pm.match_id,
        pm.account_id,
        COUNT(pmh.match_id) AS history_count
    FROM 
        player_matches pm
    LEFT JOIN 
        player_matches_history pmh ON pm.account_id = pmh.account_id
    GROUP BY 
        pm.match_id, pm.account_id
),
match_trend_stats AS (
    -- Aggregate player trend stats by match
    SELECT 
        pwt.match_id,
        SUM(pwt.has_trend) AS players_with_trends,
        COUNT(*) AS total_players
    FROM 
        players_with_trends pwt
    GROUP BY 
        pwt.match_id
),
match_history_stats AS (
    -- Aggregate player history stats by match
    SELECT 
        pwh.match_id,
        COUNT(CASE WHEN pwh.history_count >= 5 THEN 1 END) AS players_with_5plus_history,
        COUNT(*) AS total_players  
    FROM 
        players_with_history pwh
    GROUP BY 
        pwh.match_id
)
-- Final results with matches meeting all criteria
SELECT 
    COUNT(*) AS matching_matches_count,
    (SELECT COUNT(*) FROM matches) AS total_matches_count
FROM 
    match_player_counts mpc
JOIN 
    match_trend_stats mts ON mpc.match_id = mts.match_id
JOIN 
    match_history_stats mhs ON mpc.match_id = mhs.match_id
WHERE 
    mpc.player_count = 12 -- Ensure match has exactly 12 players
    AND mts.players_with_trends = 12 -- All 12 players have trends
    AND mhs.players_with_5plus_history = 12; -- All 12 players have 5+ history entries
"""

# Execute the query and get results
result = con.execute(query).fetchdf()
print(f"Matches where all 12 players have trends and 5+ history entries: {result.iloc[0, 0]} out of {result.iloc[0, 1]} total matches")

# Get a sample of these matches (first 10)
sample_query = """
WITH match_player_counts AS (
    SELECT 
        m.match_id,
        COUNT(DISTINCT pm.account_id) AS player_count
    FROM 
        matches m
    JOIN 
        player_matches pm ON m.match_id = pm.match_id
    GROUP BY 
        m.match_id
),
players_with_trends AS (
    SELECT 
        pm.match_id,
        pm.account_id,
        CASE WHEN pt.account_id IS NOT NULL THEN 1 ELSE 0 END AS has_trend
    FROM 
        player_matches pm
    LEFT JOIN 
        player_trends pt ON pm.account_id = pt.account_id
),
players_with_history AS (
    SELECT 
        pm.match_id,
        pm.account_id,
        COUNT(pmh.match_id) AS history_count
    FROM 
        player_matches pm
    LEFT JOIN 
        player_matches_history pmh ON pm.account_id = pmh.account_id
    GROUP BY 
        pm.match_id, pm.account_id
),
match_trend_stats AS (
    SELECT 
        pwt.match_id,
        SUM(pwt.has_trend) AS players_with_trends,
        COUNT(*) AS total_players
    FROM 
        players_with_trends pwt
    GROUP BY 
        pwt.match_id
),
match_history_stats AS (
    SELECT 
        pwh.match_id,
        COUNT(CASE WHEN pwh.history_count >= 5 THEN 1 END) AS players_with_5plus_history,
        COUNT(*) AS total_players  
    FROM 
        players_with_history pwh
    GROUP BY 
        pwh.match_id
),
qualifying_matches AS (
    SELECT 
        mpc.match_id
    FROM 
        match_player_counts mpc
    JOIN 
        match_trend_stats mts ON mpc.match_id = mts.match_id
    JOIN 
        match_history_stats mhs ON mpc.match_id = mhs.match_id
    WHERE 
        mpc.player_count = 12
        AND mts.players_with_trends = 12
        AND mhs.players_with_5plus_history = 12
)
SELECT 
    m.match_id,
    m.start_time,
    m.game_mode,
    m.match_mode
FROM 
    matches m
JOIN 
    qualifying_matches qm ON m.match_id = qm.match_id
ORDER BY 
    m.start_time DESC
LIMIT 10;
"""

# Print sample of qualifying matches
sample_matches = con.execute(sample_query).fetchdf()
if not sample_matches.empty:
    print("\nSample of matches meeting criteria:")
    print(sample_matches)
else:
    print("\nNo matches found meeting all criteria")

# Close the connection
con.close()

Matches where all 12 players have trends and 5+ history entries: 12690 out of 13407 total matches


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))


Sample of matches meeting criteria:
   match_id          start_time game_mode match_mode
0  35659956 2025-05-11 05:50:38    Normal   Unranked
1  35659817 2025-05-11 05:45:43    Normal   Unranked
2  35659653 2025-05-11 05:39:20    Normal   Unranked
3  35659402 2025-05-11 05:28:31    Normal   Unranked
4  35659009 2025-05-11 05:15:59    Normal   Unranked
5  35658812 2025-05-11 05:08:51    Normal   Unranked
6  35658760 2025-05-11 05:06:48    Normal   Unranked
7  35658665 2025-05-11 05:03:33    Normal   Unranked
8  35658327 2025-05-11 04:52:25    Normal   Unranked
9  35658119 2025-05-11 04:45:02    Normal   Unranked
