In [1]:
import asyncio, nest_asyncio, re, datetime
from pyppeteer import launch
import pandas as pd

nest_asyncio.apply()

In [129]:
import asyncio
import nest_asyncio
from pyppeteer import launch

def extract_ptcg_rarity_and_card_name(content):
    pattern = r'(UR|AR|SR|SAR)\s+([^\s\n]+)'
    matches = re.findall(pattern, content)
    return matches[0][0], matches[0][1]

def extract_ptcg_card_index(content):
    pattern = r'\d{3}/\d{3}'
    match = re.search(pattern, content)
    if match:
        return match.group()
    return None

def extract_card_price(content):
    pattern = r'\d{1,3}(?:,\d{3})* 円'
    match = re.search(pattern, content)
    if match:
        price_str = match.group()
        return int(price_str.replace(',', '').replace(' 円', ''))
    return None

async def extract_content(tcg_type, card_set, i):
    browser = await launch(headless=True)
    try:
        page = await browser.newPage()
        await page.goto(f'https://yuyu-tei.jp/sell/{tcg_type}/card/{card_set}/{i}', timeout=60000)
        await page.waitForSelector('.fw-bold', timeout=60000)
        fw_bold_texts = await page.evaluate('''() => {
            const boldElements = document.querySelectorAll('.fw-bold');
            return Array.from(boldElements).map(element => element.innerText).join('\\n');
        }''')
        return fw_bold_texts
    except Exception as e:
        print(f"Error for {card_set}/{i}: {e}")
        return None
    finally:
        await browser.close()



# Pokemon

In [18]:
pkm_df = pd.DataFrame(columns=['card_set','card_rarity', 'card_name', 'card_index', 'card_price', 'created_time'])

tcg_type = 'poc'

card_set = 'sv09a'

for i in range(10064,10093):
    content = asyncio.run(extract_content(tcg_type, card_set, i))
    card_rarity, card_name = extract_ptcg_rarity_and_card_name(content)
    card_index = extract_ptcg_card_index(content)
    card_price = extract_card_price(content)
    created_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    pkm_df.loc[len(pkm_df)] = [card_set, card_rarity, card_name, card_index, card_price, created_time]

card_set = 'sv02a'

for i in range(10472,10517):
    content = asyncio.run(extract_content(tcg_type, card_set, i))
    card_rarity, card_name = extract_ptcg_rarity_and_card_name(content)
    card_index = extract_ptcg_card_index(content)
    card_price = extract_card_price(content)
    created_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    pkm_df.loc[len(pkm_df)] = [card_set, card_rarity, card_name, card_index, card_price, created_time]


In [19]:
pkm_df.to_csv(f'../data/ptcg/{datetime.datetime.now().strftime("%Y%m%d")}.csv', index=False)

In [16]:
pkm_df

Unnamed: 0,card_set,card_rarity,card_name,card_index,card_price,created_time
0,sv09a,AR,ヤンヤンマ,064/063,420,2025-03-19 02:36:10
1,sv09a,AR,シロナのロズレイド,065/063,1980,2025-03-19 02:36:14
2,sv09a,AR,シェイミ,066/063,1280,2025-03-19 02:36:18
3,sv09a,AR,イワパレス,067/063,680,2025-03-19 02:36:23
4,sv09a,AR,カミツオロチ,068/063,420,2025-03-19 02:36:26


In [None]:
df = pd.DataFrame(columns=['card_set','card_rarity', 'card_name', 'card_index', 'card_price', 'created_time'])




In [74]:
df.to_csv(f'../data/opcg/{datetime.datetime.now().strftime("%Y%m%d")}.csv', index=False)

# One Piece

In [142]:
from pyppeteer import launch
import asyncio

def extract_opcg_rarity_and_card_name(content):
    pattern = r'(?:P-SEC|SEC|P-SR|P-R|P-L|SP|-)\s+[^\s\n]+'
    matches = re.search(pattern, content).group()
    if matches.split(' ')[0] == '-':
        return 'DON', matches.split(' ')[1]
    return matches.split(' ')[0], matches.split(' ')[1]

def extract_ptcg_card_index(content):
    pattern = r'\d{3}/\d{3}'
    match = re.search(pattern, content)
    if match:
        return match.group()
    return None

def extract_opcg_card_index(content):
    pattern = r'(?:OP|EB|ST)\d{2}-\d{3}'
    match = re.search(pattern, content)
    if match:
        return match.group()
    return None

async def get_links(search_word, rarity):
    browser = await launch(headless=True)
    try:
        page = await browser.newPage()
        await page.goto(f'https://yuyu-tei.jp/sell/opc/s/search?search_word={search_word}&rare={rarity}&type=&kizu=0', timeout=60000)
        hyperlinks = await page.evaluate('''() => {
            const links = document.querySelectorAll('a');
            return Array.from(links).map(link => link.href);
        }''')
        return hyperlinks
    except Exception as e:
        print(f"Error: {e}")
        return None
    finally:
        await browser.close()

rarities = ['P-SEC', 'SEC', 'P-SR', 'P-R', 'P-L', 'SP', '-']

links = []
for rarity in rarities:
    search_word = ''
    if rarity == '-':
        search_word = 'スーパーパラレル'
    links += asyncio.run(get_links(search_word, rarity))
    
links = list(set(sorted([l for l in links if 'card' in l])))

In [138]:
len(links)

598

In [144]:
for i in asyncio.run(get_links('スーパーパラレル', '-' )):
    if 'card' in i:
        print(i)

https://yuyu-tei.jp/sell/opc/card/prb01/10218
https://yuyu-tei.jp/sell/opc/card/prb01/10218
https://yuyu-tei.jp/sell/opc/card/prb01/10221
https://yuyu-tei.jp/sell/opc/card/prb01/10221
https://yuyu-tei.jp/sell/opc/card/prb01/10224
https://yuyu-tei.jp/sell/opc/card/prb01/10224
https://yuyu-tei.jp/sell/opc/card/prb01/10227
https://yuyu-tei.jp/sell/opc/card/prb01/10227
https://yuyu-tei.jp/sell/opc/card/prb01/10230
https://yuyu-tei.jp/sell/opc/card/prb01/10230
https://yuyu-tei.jp/sell/opc/card/prb01/10233
https://yuyu-tei.jp/sell/opc/card/prb01/10233
https://yuyu-tei.jp/sell/opc/card/prb01/10236
https://yuyu-tei.jp/sell/opc/card/prb01/10236
https://yuyu-tei.jp/sell/opc/card/prb01/10239
https://yuyu-tei.jp/sell/opc/card/prb01/10239
https://yuyu-tei.jp/sell/opc/card/prb01/10242
https://yuyu-tei.jp/sell/opc/card/prb01/10242
https://yuyu-tei.jp/sell/opc/card/prb01/10245
https://yuyu-tei.jp/sell/opc/card/prb01/10245
https://yuyu-tei.jp/sell/opc/card/prb01/10248
https://yuyu-tei.jp/sell/opc/card/

In [132]:
op_df = pd.DataFrame(columns=['card_set','card_rarity', 'card_name', 'card_index', 'card_price', 'created_time'])

i=0
for link in links:
    i += 1
    tcg_type = link.split('/')[-4]
    card_set = link.split('/')[-2]
    i = link.split('/')[-1]
    print(tcg_type, card_set, i, link, f'{i}/{len(links)}')
    content = asyncio.run(extract_content(tcg_type, card_set, i))
    try:
        card_rarity, card_name = extract_opcg_rarity_and_card_name(content)
    except:
        continue
    card_index = extract_opcg_card_index(content)
    card_price = extract_card_price(content)
    created_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    op_df.loc[len(op_df)] = [card_set, card_rarity, card_name, card_index, card_price, created_time]


opc prb01 10228 https://yuyu-tei.jp/sell/opc/card/prb01/10228
opc op10 10038 https://yuyu-tei.jp/sell/opc/card/op10/10038
opc prb01 10234 https://yuyu-tei.jp/sell/opc/card/prb01/10234
opc prb01 10302 https://yuyu-tei.jp/sell/opc/card/prb01/10302
opc prb01 10280 https://yuyu-tei.jp/sell/opc/card/prb01/10280
opc op04 10039 https://yuyu-tei.jp/sell/opc/card/op04/10039
opc op07 10089 https://yuyu-tei.jp/sell/opc/card/op07/10089
opc op08 10029 https://yuyu-tei.jp/sell/opc/card/op08/10029
opc promo-op10 10038 https://yuyu-tei.jp/sell/opc/card/promo-op10/10038
opc don 10017 https://yuyu-tei.jp/sell/opc/card/don/10017
opc prb01 10026 https://yuyu-tei.jp/sell/opc/card/prb01/10026
opc st13 10016 https://yuyu-tei.jp/sell/opc/card/st13/10016
opc op05 10011 https://yuyu-tei.jp/sell/opc/card/op05/10011
opc op02 10095 https://yuyu-tei.jp/sell/opc/card/op02/10095
opc op05 10073 https://yuyu-tei.jp/sell/opc/card/op05/10073
opc prb01 10192 https://yuyu-tei.jp/sell/opc/card/prb01/10192
opc op04 10149 htt

In [133]:
op_df.sort_values(by=['card_set', 'card_index'], inplace=True)

In [134]:
op_df.to_csv(f'../data/opcg/{datetime.datetime.now().strftime("%Y%m%d")}.csv', index=False)

In [135]:
op_df[op_df['card_name'].str.contains('スーパーパラレル')].sort_values(by='card_price', ascending=False)

Unnamed: 0,card_set,card_rarity,card_name,card_index,card_price,created_time
372,op09,P-SEC,ゴール・D・ロジャー(パラレル)(スーパーパラレル),OP09-118,498000,2025-03-20 04:23:11
524,op05,P-SEC,モンキー・D・ルフィ(パラレル)(スーパーパラレル),OP05-119,398000,2025-03-20 04:32:51
532,prb01,P-SR,ポートガス・D・エース(パラレル)(スーパーパラレル)(刻印あり),OP02-013,198000,2025-03-20 04:33:18
546,eb02,P-SEC,モンキー・D・ルフィ(パラレル)(スーパーパラレル),EB02-061,128000,2025-03-20 04:34:11
166,op02,P-SR,ポートガス・D・エース(パラレル)(スーパーパラレル)(刻印なし),OP02-013,128000,2025-03-20 04:08:41
354,prb01,P-SEC,サボ(パラレル)(スーパーパラレル)(刻印あり),OP04-083,128000,2025-03-20 04:22:03
386,prb01,P-SEC,シャンクス(パラレル)(スーパーパラレル)(刻印あり),OP01-120,128000,2025-03-20 04:24:07
42,op09,P-SEC,モンキー・D・ルフィ(パラレル)(スーパーパラレル),OP09-119,128000,2025-03-20 04:00:45
38,op09,P-SR,マーシャル・D・ティーチ(パラレル)(スーパーパラレル),OP09-093,99800,2025-03-20 04:00:32
395,prb01,P-SEC,そげキング(パラレル)(スーパーパラレル)(刻印あり),OP03-122,99800,2025-03-20 04:24:39
