In [16]:
import json
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
import os

In [17]:
data_path = '../data/DeckLists/'
deck_files = [f for f in listdir(data_path) if isfile(join(data_path, f)) and 'Identifier' not in f]

In [20]:
#dict -->  keys are the cards w/ a list 
#dataframe columns = deckname as str, card name as str, count as int, sideboard as bool
deck = {'deckname':[], 'cardname':[], 'card_count':[], 'sideboard':[]}


for file_name in deck_files:
    companion = 0
    file = open(data_path+file_name, 'r')
    sideboard = False
    for line in file:
        
        items = line.split(" ")
        if (items[0][:4] == 'Deck'):
            pass
        elif (items[0][:9] == 'Companion'):
            companion = 1
        elif '\n' in items or items[0][:9] == 'Sideboard':
            if companion == 1:
                companion -= 1
            else:
                sideboard = True
        else:
            try:
                deck['deckname'].append(file_name)
                deck['cardname'].append(" ".join(items[1:]).replace('\n',''))
                deck['card_count'].append(int(items[0]))
                deck['sideboard'].append(sideboard)
            except (Exception, ValueError) as error:
                 print(f'Unable to process: {error}')
                 print(file)
                

parsed_df = pd.DataFrame(deck)

In [21]:
parsed_df.info()
parsed_df.loc[parsed_df['sideboard'] == True, ['deckname', 'card_count']].groupby('deckname').sum()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1144 entries, 0 to 1143
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   deckname    1144 non-null   object
 1   cardname    1144 non-null   object
 2   card_count  1144 non-null   int64 
 3   sideboard   1144 non-null   bool  
dtypes: bool(1), int64(1), object(2)
memory usage: 28.1+ KB


Unnamed: 0_level_0,card_count
deckname,Unnamed: 1_level_1
Andrea-Mengucci-Mono-Red-Aggro-Magic-World-Championship-XXVI,15
Autumn-Burchett-Temur-Reclamation-Magic-World-Championship-XXVI,15
Chris-Kvartek-Temur-Reclamation-Magic-World-Championship-XXVI,15
Eli-Loveman-Mono-Red-Aggro-Magic-World-Championship-XXVI,15
Gabriel-Nassif-Jeskai-Fires-Magic-World-Championship-XXVI,15
Javier-Dominguez-Jeskai-Fires-Magic-World-Championship-XXVI,15
Jean-Emmanuel-Depraz-Temur-Reclamation-Magic-World-Championship-XXVI,15
Matias-Leveratto-Temur-Reclamation-Magic-World-Championship-XXVI,15
Márcio-Carvalho-Jeskai-Fires-Magic-World-Championship-XXVI,15
Ondřej-Stráský-Azorius-Control-Magic-World-Championship-XXVI,15


In [158]:
import psycopg2 as psy
from psycopg2 import sql
from io import StringIO
import sqlalchemy
from sqlalchemy import create_engine

def connect(connection_details):  
    """accepts connection details, establishes connection and returns it"""
    conn = None
    try:
        print('Connecting to PostgreSQL database...')
        conn = psy.connect(**connection_details)
    except (Exception, psy.DatabaseError) as error:
        print(f'Unable to connect to the database: {error}')
        sys.exit(1)
    print('Connection successful')
    return conn

    
con_details = {"dbname" : 'pay_to_play', 
               "user" : os.environ['PGSQL_P_USER'], 
               "password" : os.environ['PGSQL_P_PWD'], 
               "host" : 'localhost'}    

conn = connect(con_details)

Connecting to PostgreSQL database...
Connection successful


In [159]:
# def add_uuid(deck_df, conn, uuid_field, sql_lookup_field, df_lookup_field, table):
#     uuid = sql.Identifier(uuid_field)
#     query = sql.SQL("SELECT {fields} FROM {tbl}").format(
#         fields = sql.SQL(', ').join([sql.Identifier(uuid_field), sql.Identifier(sql_lookup_field)]),
#         tbl = sql.Identifier(table))
#     name_id_df = pd.read_sql(query, conn).groupby(sql_lookup_field).max()
#     return deck_df.merge(name_id_df, how = 'left', left_on=df_lookup_field, right_on=sql_lookup_field)
        
def get_most_recent_uuid_for_card_names(conn):
    query = sql.SQL("""
        WITH newestsetdate AS (
            SELECT name, MAX(setreleasedate) as max_date
            FROM core
            JOIN setdetails ON "setCode" = "setcode"
            GROUP BY name
            ),
            newestset AS (
            SELECT name, setcode
            FROM newestsetdate
            JOIN setdetails ON newestsetdate.max_date = setdetails.setreleasedate
            )

            SELECT core.uuid, newestset.name
            FROM core
            INNER JOIN newestset ON newestset.name = core.name AND newestset.setcode = core."setCode";""")
    return pd.read_sql(query, conn).groupby("name").max() 

def add_uuid_to_deck(deck_df, conn, df_name_field):
    name_id_df = get_most_recent_uuid_for_card_names(conn)
    return deck_df.merge(name_id_df, how = 'left', left_on=df_name_field, right_on="name")

    

In [29]:
uuid_deck = add_uuid_to_deck(parsed_df, conn, "cardname")

In [30]:
uuid_deck.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1144 entries, 0 to 1143
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   deckname    1144 non-null   object
 1   cardname    1144 non-null   object
 2   card_count  1144 non-null   int64 
 3   sideboard   1144 non-null   bool  
 4   uuid        1010 non-null   object
dtypes: bool(1), int64(1), object(3)
memory usage: 45.8+ KB


In [14]:
uuid_deck

Unnamed: 0,deckname,cardname,card_count,sideboard,uuid
0,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(7),Thrashing Brontodon,2,False,523c83b0-ee94-5f0a-9ad9-f7efb5aad95e
1,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(7),Trail of Crumbs,3,False,2e5c5bd2-ed4a-556f-a920-10ee0ddcb6cc
2,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(7),Kazandu Mammoth,4,False,
3,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(7),"Vorinclex, Monstrous Raider",2,False,f85a6483-b319-54a8-ac93-3dfe883fd166
4,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(7),Lovestruck Beast,4,False,
...,...,...,...,...,...
1139,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(4),"Klothys, God of Destiny",2,True,bed702eb-e2ed-5989-9341-3b1198980180
1140,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(4),"Vivien, Monsters' Advocate",2,True,5e52bd2a-f453-58d1-ab8a-9d42295f445e
1141,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(4),Mystical Dispute,3,True,5651bd95-cb41-5266-8654-93707f0c4f9a
1142,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(4),Ox of Agonas,2,True,82cd6def-fef5-5e08-8af8-934f1be45237


In [160]:
def add_new_card_data(conn_details, card_inf_df, table, conn):
    engine_path = "postgresql+psycopg2://" + conn_details['user'] + ":" + conn_details['password']  
    engine_path +='@localhost:5432/' +conn_details['dbname']
    engine = create_engine(engine_path)
    try:
        card_inf_df.to_sql(table, engine, index=False, if_exists='append')
    except (Exception, psy.DatabaseError) as error:
        print(f'Error: {error}')
        conn.rollback()
        return 1
    conn.commit()
    print(f'Successful updating of {table}')
    



In [56]:

missing_uuids = uuid_deck[uuid_deck['uuid'] != uuid_deck['uuid']].drop('uuid', axis=1)
missing_uuids.head()
deck_load = uuid_deck[uuid_deck['uuid'] == uuid_deck['uuid']].drop('cardname', axis = 1)
deck_load.head()

Unnamed: 0,deckname,card_count,sideboard,uuid
0,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(7),2,False,523c83b0-ee94-5f0a-9ad9-f7efb5aad95e
1,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(7),3,False,2e5c5bd2-ed4a-556f-a920-10ee0ddcb6cc
3,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(7),2,False,f85a6483-b319-54a8-ac93-3dfe883fd166
5,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(7),4,False,a12d890a-1619-5545-b2f0-3897d6c61c8b
6,Platinum-Mythic-Rank-Player-Traditional-(Bo3)(7),4,False,3ef6edae-e899-521c-a731-3aeeff8b154e


In [57]:
add_new_card_data(con_details, deck_load , 'decklists', conn)

Successful updating of decklists


In [58]:
add_new_card_data(con_details, missing_uuids, 'nan_uuid_decklists', conn)

Successful updating of nan_uuid_decklists


In [133]:
# 45 unmatched.  373 matched.  Need to now build a multi-field text-scan/match for name to address these.
query = sql.SQL("""
    SELECT cardname
    FROM nan_uuid_decklists
    GROUP BY cardname""")
unmatched_df = pd.read_sql(query, conn)




In [134]:
from string import punctuation

def text_rawify(text):
    lowered = text.lower()
    punct_less = ''.join([c for c in lowered if c not in punctuation])
    raw_text = ''.join(punct_less.split(' '))
    return raw_text


In [135]:
unmatched_df['raw_name'] = unmatched_df['cardname'].apply(text_rawify)

In [136]:
#unmatched_df.drop('rawtext', axis=1, inplace=True)
unmatched_df.head()

Unnamed: 0,cardname,raw_name
0,Pelakka Predation,pelakkapredation
1,Barkchannel Pathway,barkchannelpathway
2,Brightclimb Pathway,brightclimbpathway
3,"Egon, God of Death",egongodofdeath
4,Darkbore Pathway,darkborepathway


In [143]:
unmatched_df.loc[unmatched_df['raw_name'] == 'expansionexplosion224grn','raw_name'] = 'expansionexplosion'

In [144]:
#Expansion // Explosion 224 (GRN) just needs some manual processing as the decklist had wonky data w/ the '224 (GRN)
# additionally, it seems upon limited manual research, many of these cards are double-faced and the face names appear combined in name field.
# will try to do a deck-list card name rawtext search in master card name rawtext to match. 

name_id_df = get_most_recent_uuid_for_card_names(conn).reset_index()
name_id_df['rawname'] = name_id_df['name'].apply(text_rawify)
name_id_df.head()

Unnamed: 0,name,uuid,rawname
0,Absorb,7d6637d8-1d04-5977-a925-f4187ac3fd1c,absorb
1,Absorb Identity,fa3e1540-8f7c-5fcc-9aaf-19b6b4af1c6b,absorbidentity
2,Acclaimed Contender,7ebde572-911a-5ce4-a607-fe21718e01e0,acclaimedcontender
3,Acolyte of Affliction,3a61e0b3-4c1a-5971-a27d-81ee96d85b4a,acolyteofaffliction
4,Acquisitions Expert,16b73777-ebad-5771-b1c7-0726eeda9f3c,acquisitionsexpert


In [145]:
def find_unmatched_uuids(cardname, unmatched_rawname, core_card_names_df, search_in_col, uuid_col_num):
    result = core_card_names_df[core_card_names_df[search_in_col].str.contains(unmatched_rawname, na=False, regex=False) == True]
    if result.empty == False:
        return cardname, unmatched_rawname, result.iloc[0,uuid_col_num]




In [146]:


uuid_matches = []
for i in range(unmatched_df.shape[0]):
    match = find_unmatched_uuids(unmatched_df.iloc[i,0],unmatched_df.iloc[i,1], name_id_df, 'rawname',1)
    if match is not None:
        uuid_matches.append(match)

matched_name = pd.DataFrame(uuid_matches, columns=['cardname', 'rawname', 'uuid'])   

In [147]:
matched_name

Unnamed: 0,cardname,rawname,uuid
0,Pelakka Predation,pelakkapredation,956cb1d0-2beb-51f9-8f33-38cb28280d2b
1,Barkchannel Pathway,barkchannelpathway,c277aee9-4263-5fbe-ad24-57f5533f14da
2,Brightclimb Pathway,brightclimbpathway,7e47679e-24a0-57dc-970f-b29efb7d55ec
3,"Egon, God of Death",egongodofdeath,e7619d66-eed8-5e0d-9b45-26e299de2ac1
4,Darkbore Pathway,darkborepathway,8dbc37a4-d21a-5539-8e45-ef2955a0456d
5,Beanstalk Giant,beanstalkgiant,e78d0b7a-7b47-59b2-a148-c5a130224adb
6,Turntimber Symbiosis,turntimbersymbiosis,f6aeb170-0112-5224-8df5-274ce7558327
7,Blightstep Pathway,blightsteppathway,f77952ce-3925-5753-b2b1-45b332ab893d
8,Branchloft Pathway,branchloftpathway,6072997d-fb42-5bc9-b7ee-b82f00165c4d
9,"Reidane, God of the Worthy",reidanegodoftheworthy,fb2bf34b-1c20-5fc2-97f1-5d5170a94364


In [154]:
query = sql.SQL("""
    SELECT *
    FROM nan_uuid_decklists
    """)
nan_uuid_decklists = pd.read_sql(query, conn)
to_update = nan_uuid_decklists.merge(matched_name, how = 'left', left_on='cardname', right_on='cardname')
to_update.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 134 entries, 0 to 133
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   cardname    134 non-null    object
 1   card_count  134 non-null    int64 
 2   sideboard   134 non-null    bool  
 3   deckname    134 non-null    object
 4   rawname     134 non-null    object
 5   uuid        134 non-null    object
dtypes: bool(1), int64(1), object(4)
memory usage: 6.4+ KB


In [161]:
add_new_card_data(con_details, to_update[['deckname', 'card_count', 'sideboard', 'uuid']], 'decklists', conn)


Successful updating of decklists


In [162]:
#woo hoo!!!!! it worked!