In [None]:
import psycopg2
from sqlalchemy import create_engine, text
import os

# Database connection parameters
dbname = os.getenv("DB_NAME")
user = os.getenv("DB_USER")
password = os.getenv("DB_PASSWORD")
host = os.getenv("DB_HOST")
port = '5432'

# Connect to the PostgreSQL database
engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{dbname}')

# Set up the database query to retrieve URLs for the given tournament ID
with engine.connect() as conn:
    
    transaction = conn.begin()
    conn.execute(
        text("""
    UPDATE tbl_placement_data
    SET player_name = SUBSTRING(player_name, 1, LENGTH(player_name) - 6)
    WHERE LOWER(player_name) LIKE '%#eprod';
    """)
    )
    transaction.commit()



In [44]:
import psycopg2
from sqlalchemy import create_engine, text
from Levenshtein import distance
import hashlib


def hash_player_name(name, seed=0):
    # Create a hash object using MD5, including the seed in the hash input for collision resolution
    hash_input = f"{name}-{seed}".encode()
    hash_object = hashlib.md5(hash_input)

    # Convert the hash to an 8-digit number
    eight_digit_hash = int(hash_object.hexdigest(), 16) % 100000000
    return int(eight_digit_hash)

def similar_names(name1, name2):

    if (len(name1.split(" ")) > len(name2.split(" "))):
        longer_name, other = name1, name2
    else:
        longer_name, other = name2, name1

    longer_name_split = longer_name.split(" ")

    if (len(longer_name_split) > 1):

        # print(longer_name_split, other)

        tag_index = 1

        # in the case where a team tag has a space between them...

        if longer_name_split[1].isupper():
            tag_index = 2

        no_tag = " ".join(longer_name_split[tag_index:len(longer_name_split)])

        # print(no_tag)

        if no_tag.lower() in other.lower() and len(no_tag) > 3:
            return True
    
    if len(name1) > 3 and len(name2) > 3 and (name1.lower() in name2.lower() or name2.lower() in name1.lower()):
        return True

    if distance(name1.lower(), name2.lower()) < 2 and (len(name1) >= 6 or len(name2) >=6):
        return True

    return False

# Database connection parameters
dbname = os.getenv("DB_NAME")
user = os.getenv("DB_USER")
password = os.getenv("DB_PASSWORD")
host = os.getenv("DB_HOST")
port = '5432'

# Connect to the PostgreSQL database
engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{dbname}')

names = set()

already_there = set()

# Set up the database query to retrieve URLs for the given tournament ID
with engine.connect() as conn:
    
    result = conn.execute(
        text("""
    SELECT DISTINCT player_name FROM tbl_placement_data;
    """)
    )

# Set up the database query to retrieve URLs for the given tournament ID
with engine.connect() as conn:
    
    result2 = conn.execute(
        text("""
    SELECT DISTINCT player_name FROM tbl_final_standings;
    """)
    )

for row in result:
    names.add(row[0])

for row in result2:
    names.add(row[0])

names = list(names)

groups = {}

with engine.connect() as conn:
    
    result = conn.execute(
        text("""
    SELECT DISTINCT player_name, player_id FROM tbl_player_aliases;
    """)
    )

for row in result:
    already_there.add(row[0])
    p_id = int(row[1])

    if p_id in groups:
        groups[p_id].append(row[0])
    else:
        groups[p_id] = [row[0]]

print(groups)
print(already_there)


for name in names:
    name = name.replace('#eprod', '')
    if name in already_there:
        continue

    found_group = False
    for key, group_names in groups.items():
        if any(similar_names(name, existing_name) for existing_name in group_names):
            groups[key].append(name)
            found_group = True
            break

    if not found_group:
        seed = 0
        hash = hash_player_name(name, seed)
        while hash in groups.keys():
            print(name, groups[hash])
            seed += 1
            hash = hash_player_name(name, seed)
        groups[hash] = [name]

verify = []
        
with engine.connect() as conn:

    transaction = conn.begin()
    for k, v in groups.items():
        num_new_in_bucket = 0
        for name in v:
            if name in already_there:
                continue
            
            num_new_in_bucket += 1 
            query = f"""INSERT INTO tbl_player_aliases (player_name, player_id) 
                        VALUES ('{name}',{k});"""
            conn.execute(
                text(query)
            )
        if num_new_in_bucket > 0 and len(v) > 1:
            verify.append(v)
    transaction.commit()
        

verify


{41081567: ['Viktoker'], 67944654: ['Durkzera'], 31449829: ['Eusouolucas', 'eusouolucas', 'eusouolucas#eprod'], 31399259: ['Dr OH'], 69439183: ['KiBi'], 34175457: ['Koala Esbelto#eprod', 'Koala Esbelto'], 15252658: ['brikstn', 'Briks'], 73030110: ['stopteqh'], 99007395: ['ILikeHoboes'], 54858463: ['Quinho'], 66730554: ['Shawn'], 97832094: ['notrpion'], 66232112: ['LiuLi'], 15300237: ['prestivent', 'Prestivent', 'prestivent#eprod', 'VIT prestivent'], 74726477: ['Mistborn', 'Mistborn#eprod'], 83939924: ['小风很帅'], 41191331: ['MSI PSik'], 18771370: ['TSM enaek'], 64059200: ['TexSummers#eprod', 'TexSummers'], 10135535: ['ISG MAIKEL#eprod', 'ISG Maikel', 'MAIKEL', 'ISG MAIKEL'], 48552254: ['Zanlo'], 1373742: ['milala#eprod', 'FNC milala', 'milala'], 47393839: ['TL Rereplay', 'Rereplay', 'Liquid Rereplay#eprod', 'TOR Rereplay', 'Liquid Rereplay'], 73041350: ['Salamander145'], 91906293: ['bertasaurus'], 19446606: ['Stoneweaver'], 85805531: ['KitingisHard'], 59175963: ['Shener Shên'], 89127326: 

[]

In [45]:

[x for x in list(groups.items()) if len(x[1]) > 1]


[(31449829, ['Eusouolucas', 'eusouolucas', 'eusouolucas#eprod']),
 (34175457, ['Koala Esbelto#eprod', 'Koala Esbelto']),
 (15252658, ['brikstn', 'Briks']),
 (15300237,
  ['prestivent', 'Prestivent', 'prestivent#eprod', 'VIT prestivent']),
 (74726477, ['Mistborn', 'Mistborn#eprod']),
 (64059200, ['TexSummers#eprod', 'TexSummers']),
 (10135535, ['ISG MAIKEL#eprod', 'ISG Maikel', 'MAIKEL', 'ISG MAIKEL']),
 (1373742, ['milala#eprod', 'FNC milala', 'milala']),
 (47393839,
  ['TL Rereplay',
   'Rereplay',
   'Liquid Rereplay#eprod',
   'TOR Rereplay',
   'Liquid Rereplay']),
 (89127326, ['Barney', 'Lab 010 Barney#eprod', 'Lab 010 Barney']),
 (31593866,
  ['INF HR SuperPino#eprod', 'INF HR SuperPino', 'INF HR Pino', 'SuperPino']),
 (78647039, ['Darth Nub', 'Darth Nub#eprod']),
 (3930687, ['Roontonzorg', 'Roontonzorg#eprod']),
 (35014403, ['LiShao', 'VSPO LiShao']),
 (2716447, ['Hisoka', 'Hisokalol']),
 (71919866, ['Fizz', 'MK Fizz#eprod', 'MK Fizz']),
 (50343287, ['MeanMisterKien', 'MeanMiste

In [43]:
name = "Tacca TFT"
hash = hash_player_name(name, seed=0)

print(hash)

with engine.connect() as conn:
    
    result = conn.execute(
        text(f"""
    SELECT player_name, player_id FROM tbl_player_aliases where player_id={hash};
    """)
    )

for row in result:
    print(row[0], row[1])

46528245
