Create deadlock.db

In [1]:
import duckdb
import pandas as pd

con = duckdb.connect("deadlock.db")

Drop tables for reset

In [None]:
def drop_tables(db_loc):
    db_loc = "deadlock.db"
    con.execute("DROP TABLE IF EXISTS matches")
    con.execute("DROP TABLE IF EXISTS player_matches")
    con.execute("DROP TABLE IF EXISTS player_trends")
    con.execute("DROP TABLE IF EXISTS hero_trends")

Create tables

In [None]:
con.execute("""
CREATE TABLE matches (
    match_id BIGINT PRIMARY KEY,
    start_time TIMESTAMP,
    game_mode INTEGER,
    match_mode INTEGER,
    match_duration_s INTEGER,
    objectives_mask_team0 BIGINT,
    objectives_mask_team1 BIGINT,
    match_result VARCHAR
)
""")

In [None]:
con.execute("""
CREATE TABLE player_matches (
    account_id BIGINT,
    match_id BIGINT,
    hero_id INTEGER,
    hero_level INTEGER,
    player_team INTEGER,
    player_kills INTEGER,
    player_deaths INTEGER,
    player_assists INTEGER,
    denies INTEGER,
    net_worth BIGINT,
    last_hits INTEGER,
    team_abandoned BOOLEAN,
    abandoned_time_s INTEGER,
    won BOOLEAN,
    
    PRIMARY KEY (account_id, match_id)
)
""")

In [None]:
con.execute("""
CREATE TABLE player_trends (
    account_id BIGINT,
    match_id BIGINT,
    hero_id INTEGER,
    p_win_pct_3 FLOAT,
    p_win_pct_5 FLOAT,
    p_streak_3 VARCHAR,
    p_streak_5 VARCHAR,
    h_win_pct_3 FLOAT,
    h_win_pct_5 FLOAT,
    h_streak_3 VARCHAR,
    h_streak_5 VARCHAR,

    PRIMARY KEY (account_id, match_id, hero_id)
)
""")

In [None]:
con.execute("""
CREATE TABLE hero_trends (
    hero_id INTEGER,
    trend_start_date DATE,
    trend_end_date DATE,
    trend_date DATE,
    trend_window_days INTEGER,
    pick_rate FLOAT,
    win_rate FLOAT,
    average_kills FLOAT,
    average_deaths FLOAT,
    average_assists FLOAT,

    PRIMARY KEY (hero_id, trend_start_date, trend_end_date, trend_window_days)
)
""")


In [None]:
con.execute("""
    CREATE TABLE player_profiles (
    account_id BIGINT PRIMARY KEY,
    
    avg_p_win_pct_3 FLOAT,
    avg_p_win_pct_5 FLOAT,
    avg_h_win_pct_3 FLOAT,
    avg_h_win_pct_5 FLOAT,
    
    total_matches INTEGER,

    norm_avg_p_win_pct_3 FLOAT,  
    norm_avg_p_win_pct_5 FLOAT,
    norm_avg_h_win_pct_3 FLOAT,
    norm_avg_h_win_pct_5 FLOAT
)
""")

function to insert player_matches into tables

In [2]:
def split_and_insert_all(con, full_df):
    """
    Splits full raw DataFrame into parts, inserts into DuckDB tables,
    and builds raw (non-normalized) player_profiles from trends.
    """

    #Define the columns for each table
    match_columns = [
        'match_id', 'start_time', 'game_mode', 'match_mode',
        'match_duration_s', 'objectives_mask_team0', 'objectives_mask_team1', 'match_result'
    ]

    player_columns = [
        'account_id', 'match_id', 'hero_id', 'hero_level', 'player_team',
        'player_kills', 'player_deaths', 'player_assists', 'denies',
        'net_worth', 'last_hits', 'team_abandoned', 'abandoned_time_s', 'won'
    ]

    trend_columns = [
        'account_id', 'match_id', 'hero_id',
        'p_win_pct_3', 'p_win_pct_5', 'p_streak_3', 'p_streak_5',
        'h_win_pct_3', 'h_win_pct_5', 'h_streak_3', 'h_streak_5'
    ]

    #Split the DataFrame
    match_df = full_df[match_columns].drop_duplicates(subset=['match_id'])
    player_df = full_df[player_columns]
    trends_df = full_df[trend_columns]

    #Insert match, player, trend tables
    insert_dataframes(
        con,
        match_df=match_df,
        player_df=player_df,
        trends_df=trends_df
    )

    #Build player_profiles
    #player_profiles_df = build_player_profiles(trends_df)

    #Insert player_profiles
    #insert_dataframes(
    #    con,
    #    player_profiles_df=player_profiles_df
    #)
    print(f"*DEBUG* - split and insert completed!")


Insert dfs into Duckdb

In [5]:
def insert_dataframes(con, match_df=None, player_df=None, trends_df=None, hero_trends_df=None):
    """
    Inserts available DataFrames into their corresponding DuckDB tables.
    Only non-None DataFrames are inserted.
    
    Parameters:
    - con: active DuckDB connection
    - match_df: DataFrame for 'matches' table
    - player_df: DataFrame for 'player_matches' table
    - trends_df: DataFrame for 'player_trends' table
    - hero_trends_df: DataFrame for 'hero_trends' table
    """
    print(f"\n\n*Debug*\n match_df headers are: {match_df.head()} and columns are: {match_df.columns.tolist()}")
    if match_df is not None:
        con.execute("INSERT OR IGNORE INTO matches SELECT * FROM match_df")

    if player_df is not None:
        con.execute("INSERT OR IGNORE INTO player_matches SELECT * FROM player_df")

    if trends_df is not None:
        con.execute("INSERT OR IGNORE INTO player_trends SELECT * FROM trends_df")

    if hero_trends_df is not None:
        con.execute("INSERT OR IGNORE INTO hero_trends SELECT * FROM hero_trends_df")

    print("✅ Data inserted successfully!")

load .csv and run through split and insert

In [6]:
full_df = pd.read_csv("p_id_match_history.csv")
full_df['start_time'] = pd.to_datetime(full_df['start_time'], unit='s')
print(f"\n*DEBUG* headers: {full_df.head()}\n")
print(f"\n*DEBUG* columns: {full_df.columns.tolist()}")

split_and_insert_all(con,full_df)


*DEBUG* headers:    account_id  match_id  hero_id  hero_level          start_time  game_mode  \
0  1032337409    471321        2          19 2024-07-29 15:20:52          1   
1  1032337409    471609        6          17 2024-07-29 15:52:17          1   
2  1032337409    471883       11          15 2024-07-29 16:21:39          1   
3  1032337409    472119       20          18 2024-07-29 16:49:05          1   
4  1032337409    472400       50          12 2024-07-29 17:22:17          1   

   match_mode  player_team  player_kills  player_deaths  ...  \
0           1            1             2              4  ...   
1           1            0             4              7  ...   
2           1            0             0             11  ...   
3           1            0             2              3  ...   
4           1            0             1              6  ...   

   objectives_mask_team1    won  p_win_pct_3  p_win_pct_5         p_streak_3  \
0                  65121   True          N

NameError: name 'build_player_profiles' is not defined