In [46]:
import asyncio, nest_asyncio, re
from datetime import datetime
from pyppeteer import launch
import pandas as pd

nest_asyncio.apply()

In [47]:
t_start = datetime.now()

# Pokemon

In [48]:
def extract_ptcg_rarity_and_card_name(content):
    pattern = r'(?:S-TD|UR|AR|SR|SAR)\s+[^\s\n]+'
    match = re.search(pattern, content)
    if match.group():
        return match.group().split(' ')[0], match.group().split(' ')[1]
    return None

def extract_ptcg_card_index(content):
    pattern = r'\d{3}/\d{3}'
    match = re.search(pattern, content)
    if match:
        return match.group()
    return None

def extract_card_price(content):
    pattern = r'\d{1,3}(?:,\d{3})* 円'
    match = re.search(pattern, content)
    if match:
        price_str = match.group()
        return int(price_str.replace(',', '').replace(' 円', ''))
    return None
    
async def extract_content(tcg_type, card_set, i):
    browser = await launch(headless=True)
    try:
        page = await browser.newPage()
        await page.goto(f'https://yuyu-tei.jp/sell/{tcg_type}/card/{card_set}/{i}', timeout=60000)
        await page.waitForSelector('.fw-bold', timeout=60000)
        print(f'https://yuyu-tei.jp/sell/{tcg_type}/card/{card_set}/{i}')
        fw_bold_texts = await page.evaluate('''() => {
            const boldElements = document.querySelectorAll('.fw-bold');
            return Array.from(boldElements).map(element => element.innerText).join('\\n');
        }''')
        return fw_bold_texts
    except Exception as e:
        print(f"Error for {card_set}/{i}: {e}")
        return None
    finally:
        await browser.close()
        
async def get_links(vers, rarity):
    browser = await launch(headless=True)
    try:
        page = await browser.newPage()
        await page.goto(f'https://yuyu-tei.jp/sell/poc/s/search?search_word={vers}&rare={rarity}&type=&kizu=0', timeout=60000)
        hyperlinks = await page.evaluate('''() => {
            const links = document.querySelectorAll('a');
            return Array.from(links).map(link => link.href);
        }''')
        return hyperlinks
    except Exception as e:
        print(f"Error: {e}")
        return None
    finally:
        await browser.close()


In [49]:
ptcg_rarity_table = {
    'UR':  ['sv10','sv09a','sv09','sv08a','sv08','sv07a','sv07','sv06a','sv06','sv05a','sv05k','sv05m','sv04a','sv04k','sv04m','sv03a','sv03','sv02a','sv02p','sv02d','sv01a','sv01s','sv01v','s12a'],
    'SAR': ['sv10','sv09a','sv09','sv08a','sv08','sv07a','sv07','sv06a','sv06','sv05a','sv05k','sv05m','sv04a','sv04k','sv04m','sv03a','sv03','sv02a','sv02p','sv02d','sv01a','sv01s','sv01v','s12a'],
    'SR':  ['sv10','sv09a','sv09','sv08a','sv08','sv07a','sv07','sv06a','sv06','sv05a','sv05k','sv05m','sv04a','sv04k','sv04m','sv03a','sv03','sv02a','sv02p','sv02d','sv01a','sv01s','sv01v','s12a'],
    'AR':  ['sv10','sv09a','sv09','sv08a','sv08','sv07a','sv07','sv06a','sv06','sv05a','sv05k','sv05m','sv04a','sv04k','sv04m','sv03a','sv03','sv02a','sv02p','sv02d','sv01a','sv01s','sv01v','s12a'],
    'S-TD': ['svg']
}

In [50]:
links = []
for rarity in ptcg_rarity_table.keys():
    all_links = asyncio.run(get_links(''.join('&vers%5B%5D=' + i for i in ptcg_rarity_table[rarity]), rarity))
    cleaned_links = [url for url in all_links if any(val in url for val in ptcg_rarity_table[rarity]) and 'card' in url]
    links += cleaned_links

links = list(set(links))
sorted_links = sorted(links, key=lambda x: (x.split('/card/')[1].split('/')[0], int(x.split('/')[-1])))
len(sorted_links)

857

###### pkm_df = pd.DataFrame(columns=['card_set','card_rarity', 'card_name', 'card_index', 'card_price', 'created_time'])
idx=0

for link in sorted_links:
    idx += 1
    tcg_type = link.split('/')[-4]
    card_set = link.split('/')[-2]
    i = link.split('/')[-1]
    print(f'{idx}/{len(sorted_links)}')
    content = asyncio.run(extract_content(tcg_type, card_set, i))
    try:
        card_rarity, card_name = extract_ptcg_rarity_and_card_name(content)
    except:
        continue
    card_index = extract_ptcg_card_index(content)
    card_price = extract_card_price(content)
    created_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    pkm_df.loc[len(pkm_df)] = [card_set, card_rarity, card_name, card_index, card_price, created_time]

In [51]:
pkm_df.to_csv(f'../data/ptcg/{datetime.now().strftime("%Y%m%d")}.csv', index=False, encoding='utf-8-sig')

In [62]:
(datetime.now() - t_start).seconds/60

53.333333333333336

In [60]:
pkm_df

Unnamed: 0,card_set,card_rarity,card_name,card_index,card_price,created_time
0,s12a,AR,ヒスイビリリダマ,173/172,220,2025-04-27 23:51:21
1,s12a,AR,コロトック,174/172,220,2025-04-27 23:51:25
2,s12a,AR,ブーバーン,175/172,320,2025-04-27 23:51:28
3,s12a,AR,オドリドリ,176/172,320,2025-04-27 23:51:31
4,s12a,AR,ラプラス,177/172,500,2025-04-27 23:51:35
...,...,...,...,...,...,...
815,svg,S-TD,リザードンex(RR仕様),009/049,780,2025-04-28 00:46:17
816,svg,S-TD,カメックスex(RR仕様),016/049,420,2025-04-28 00:46:21
817,svg,S-TD,フシギダネ(AR仕様),050/049,6980,2025-04-28 00:46:24
818,svg,S-TD,ヒトカゲ(AR仕様),051/049,7980,2025-04-28 00:46:28


# One Piece

In [53]:
from pyppeteer import launch
import asyncio

def extract_opcg_rarity_and_card_name(content):
    pattern = r'(?:P-SEC|SEC|P-SR|P-R|P-L|SP|-)\s+[^\s\n]+'
    matches = re.search(pattern, content).group()
    if matches.split(' ')[0] == '-':
        return 'DON', matches.split(' ')[1]
    return matches.split(' ')[0], matches.split(' ')[1]

def extract_ptcg_card_index(content):
    pattern = r'\d{3}/\d{3}'
    match = re.search(pattern, content)
    if match:
        return match.group()
    return None

def extract_opcg_card_index(content):
    pattern = r'(?:OP|EB|ST)\d{2}-\d{3}'
    match = re.search(pattern, content)
    if match:
        return match.group()
    return None

async def get_links(search_word, rarity):
    browser = await launch(headless=True)
    try:
        page = await browser.newPage()
        await page.goto(f'https://yuyu-tei.jp/sell/opc/s/search?search_word={search_word}&rare={rarity}&type=&kizu=0', timeout=60000)
        hyperlinks = await page.evaluate('''() => {
            const links = document.querySelectorAll('a');
            return Array.from(links).map(link => link.href);
        }''')
        return hyperlinks
    except Exception as e:
        print(f"Error: {e}")
        return None
    finally:
        await browser.close()

rarities = ['P-SEC', 'SEC', 'P-SR', 'P-R', 'P-L', 'SP', '-']


In [54]:
t_start2 = datetime.now()

In [55]:
links = []
for rarity in rarities:
    search_word = ''
    if rarity == '-':
        search_word = 'スーパーパラレル'
    links += asyncio.run(get_links(search_word, rarity))
    
links = list(set(sorted([l for l in links if 'card' in l])))
sorted_links = sorted(links, key=lambda x: (x.split('/card/')[1].split('/')[0], int(x.split('/')[-1])))
len(sorted_links)

599

In [56]:
op_df = pd.DataFrame(columns=['card_set','card_rarity', 'card_name', 'card_index', 'card_price', 'created_time'])

idx=0
for link in sorted_links:
    idx += 1
    tcg_type = link.split('/')[-4]
    card_set = link.split('/')[-2]
    i = link.split('/')[-1]
    print(f'{idx}/{len(links)}')
    content = asyncio.run(extract_content(tcg_type, card_set, i))
    try:
        card_rarity, card_name = extract_opcg_rarity_and_card_name(content)
    except:
        continue
    card_index = extract_opcg_card_index(content)
    card_price = extract_card_price(content)
    created_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    op_df.loc[len(op_df)] = [card_set, card_rarity, card_name, card_index, card_price, created_time]


1/599
https://yuyu-tei.jp/sell/opc/card/eb01/10002
2/599
https://yuyu-tei.jp/sell/opc/card/eb01/10005
3/599
https://yuyu-tei.jp/sell/opc/card/eb01/10009
4/599
https://yuyu-tei.jp/sell/opc/card/eb01/10010
5/599
https://yuyu-tei.jp/sell/opc/card/eb01/10017
6/599
https://yuyu-tei.jp/sell/opc/card/eb01/10019
7/599
https://yuyu-tei.jp/sell/opc/card/eb01/10028
8/599
https://yuyu-tei.jp/sell/opc/card/eb01/10030
9/599
https://yuyu-tei.jp/sell/opc/card/eb01/10036
10/599
https://yuyu-tei.jp/sell/opc/card/eb01/10041
11/599
https://yuyu-tei.jp/sell/opc/card/eb01/10045
12/599
https://yuyu-tei.jp/sell/opc/card/eb01/10052
13/599
https://yuyu-tei.jp/sell/opc/card/eb01/10059
14/599
https://yuyu-tei.jp/sell/opc/card/eb01/10062
15/599
https://yuyu-tei.jp/sell/opc/card/eb01/10064
16/599
https://yuyu-tei.jp/sell/opc/card/eb01/10068
17/599
https://yuyu-tei.jp/sell/opc/card/eb01/10073
18/599
https://yuyu-tei.jp/sell/opc/card/eb01/10075
19/599
https://yuyu-tei.jp/sell/opc/card/eb01/10079
20/599
https://yuyu-t

In [57]:
op_df.to_csv(f'../data/opcg/{datetime.now().strftime("%Y%m%d")}.csv', index=False, encoding='utf-8-sig')

In [58]:
op_df

Unnamed: 0,card_set,card_rarity,card_name,card_index,card_price,created_time
0,eb01,P-L,光月おでん(パラレル),EB01-001,980,2025-04-29 00:25:31
1,eb01,P-R,キッド&キラー(パラレル),EB01-003,980,2025-04-29 00:25:34
2,eb01,P-SR,トニートニー・チョッパー(パラレル),EB01-006,1280,2025-04-29 00:25:38
3,eb01,P-SR,トニートニー・チョッパー(パラレル)(スーパーパラレル),EB01-006,59800,2025-04-29 00:25:41
4,eb01,P-SR,キャベンディッシュ(パラレル),EB01-012,1280,2025-04-29 00:25:44
...,...,...,...,...,...,...
558,st21,P-SR,モンキー・D・ルフィ(パラレル),ST21-014,4980,2025-04-29 00:58:59
559,st21,P-SR,ロロノア・ゾロ(パラレル),ST21-015,1980,2025-04-29 00:59:03
560,st22,P-L,ニューゲート＆エース(パラレル),ST22-001,4980,2025-04-29 00:59:06
561,st22,P-SR,イゾウ(パラレル),ST22-002,1980,2025-04-29 00:59:09


In [59]:
(datetime.now() - t_start2).seconds/60

34.166666666666664