In [97]:
import requests
from bs4 import BeautifulSoup

query = input("検索語を入力してください: ")
url = f"https://www.beatport.com/search/tracks?q={query}&per-page=150"

response = requests.get(url)

if response.status_code == 200:
    print("ページの取得に成功しました")
else:
    print(f"ページの取得に失敗しました: {response.status_code}")

soup = BeautifulSoup(response.content, "html.parser")

print(soup.prettify())

ページの取得に失敗しました: 403
<!DOCTYPE html>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<title>
 403
</title>
403 Forbidden



In [98]:
# 403エラーが出る場合はこちら

# ブラウザのUser-Agentを模倣するためのヘッダーを追加
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
}

query = input("検索語を入力してください: ")
url = f"https://www.beatport.com/search/tracks?q={query}&per-page=150"

response = requests.get(url, headers=headers)

if response.status_code == 200:
    print("ページの取得に成功しました")
else:
    print(f"ページの取得に失敗しました: {response.status_code}")

soup = BeautifulSoup(response.content, "html.parser")

print(soup.prettify())

ページの取得に成功しました
<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width" name="viewport"/>
  <link href="/search/tracks?q=rock&amp;per-page=150" hreflang="en" rel="canonical"/>
  <link href="/de/search/tracks?q=rock&amp;per-page=150" hreflang="de" rel="alternate"/>
  <link href="/fr/search/tracks?q=rock&amp;per-page=150" hreflang="fr" rel="alternate"/>
  <link href="/es/search/tracks?q=rock&amp;per-page=150" hreflang="es" rel="alternate"/>
  <link href="/it/search/tracks?q=rock&amp;per-page=150" hreflang="it" rel="alternate"/>
  <link href="/ja/search/tracks?q=rock&amp;per-page=150" hreflang="ja" rel="alternate"/>
  <link href="/pt/search/tracks?q=rock&amp;per-page=150" hreflang="pt" rel="alternate"/>
  <link href="/nl/search/tracks?q=rock&amp;per-page=150" hreflang="nl" rel="alternate"/>
  <title>
   rock :: Beatport
  </title>
  <meta content="rock :: Beatport" property="og:title"/>
  <meta content="Download and listen to new, exclusive, e

In [99]:
json_script = soup.find("script", id="__NEXT_DATA__", type="application/json")


In [100]:
import json
if json_script:
    json_data = (json.loads(json_script.string)) # jsonを辞書型に変換
else:
    print("JSONデータが見つかりませんでした。")

In [102]:
def extract_keys(data, parent_key=''):
    keys = []
    if isinstance(data, dict):
        for key, value in data.items():
            # 現在のキーを追加
            full_key = f"{parent_key}.{key}" if parent_key else key
            keys.append(full_key)
            # 再帰的にネストされた部分を探索
            keys.extend(extract_keys(value, full_key))
    elif isinstance(data, list):
        for i, item in enumerate(data):
            # リストの場合はインデックスを付与して追跡
            full_key = f"{parent_key}[{i}]"
            keys.extend(extract_keys(item, full_key))
    return keys

In [103]:
keys = extract_keys(json_data)
keys

['props',
 'props.pageProps',
 'props.pageProps._sentryTraceData',
 'props.pageProps.anonSession',
 'props.pageProps.anonSession.access_token',
 'props.pageProps.anonSession.expires_in',
 'props.pageProps.anonSession.token_type',
 'props.pageProps.anonSession.scope',
 'props.pageProps.anonSession.create_date',
 'props.pageProps.query',
 'props.pageProps.location',
 'props.pageProps.location.realIp',
 'props.pageProps.location.countryCode',
 'props.pageProps.dehydratedState',
 'props.pageProps.dehydratedState.mutations',
 'props.pageProps.dehydratedState.queries',
 'props.pageProps.dehydratedState.queries[0].state',
 'props.pageProps.dehydratedState.queries[0].state.data',
 'props.pageProps.dehydratedState.queries[0].state.data.data',
 'props.pageProps.dehydratedState.queries[0].state.data.data[0].score',
 'props.pageProps.dehydratedState.queries[0].state.data.data[0].add_date',
 'props.pageProps.dehydratedState.queries[0].state.data.data[0].artists',
 'props.pageProps.dehydratedState.q

In [104]:
itemList = json_data['props']['pageProps']['dehydratedState']['queries'][0]['state']['data']['data']
len(itemList)

150

In [105]:
extract_keys(itemList)

['[0].score',
 '[0].add_date',
 '[0].artists',
 '[0].artists[0].artist_id',
 '[0].artists[0].artist_name',
 '[0].artists[0].artist_type_name',
 '[0].available_worldwide',
 '[0].bpm',
 '[0].catalog_number',
 '[0].change_date',
 '[0].chord_type_id',
 '[0].current_status',
 '[0].current_status.current_status_id',
 '[0].current_status.current_status_name',
 '[0].enabled',
 '[0].encode_status',
 '[0].exclusive_date',
 '[0].exclusive_period',
 '[0].genre_enabled',
 '[0].guid',
 '[0].is_available_for_streaming',
 '[0].is_classic',
 '[0].isrc',
 '[0].key_id',
 '[0].key_name',
 '[0].label',
 '[0].label.enabled',
 '[0].label.label_id',
 '[0].label.label_name',
 '[0].label_manager',
 '[0].length',
 '[0].mix_name',
 '[0].publish_date',
 '[0].publish_status',
 '[0].release',
 '[0].release.release_id',
 '[0].release.release_name',
 '[0].release.release_image_uri',
 '[0].release.release_image_dynamic_uri',
 '[0].release_date',
 '[0].sale_type',
 '[0].suggest',
 '[0].suggest.input',
 '[0].suggest.weig

In [106]:
extract_keys(itemList[1])

['score',
 'add_date',
 'artists',
 'artists[0].artist_id',
 'artists[0].artist_name',
 'artists[0].artist_type_name',
 'available_worldwide',
 'bpm',
 'catalog_number',
 'change_date',
 'chord_type_id',
 'current_status',
 'current_status.current_status_id',
 'current_status.current_status_name',
 'enabled',
 'encode_status',
 'exclusive_period',
 'genre_enabled',
 'guid',
 'is_available_for_streaming',
 'is_classic',
 'isrc',
 'key_id',
 'key_name',
 'label',
 'label.enabled',
 'label.label_id',
 'label.label_name',
 'label_manager',
 'length',
 'mix_name',
 'publish_date',
 'publish_status',
 'release',
 'release.release_id',
 'release.release_name',
 'release.release_image_uri',
 'release.release_image_dynamic_uri',
 'release_date',
 'sale_type',
 'streaming_date',
 'suggest',
 'suggest.input',
 'suggest.weight',
 'supplier_id',
 'track_id',
 'track_name',
 'track_number',
 'update_date',
 'was_ever_exclusive',
 'downloads',
 'plays',
 'price',
 'price.code',
 'price.symbol',
 'pri

In [107]:
trackList = []
for item in itemList:
    track = {}
    track['title'] = item.get('release', "").get('release_name', "") # 属性がない場合 ""
    track['mix'] = item.get('mix_name', "")
    track['artist'] = item.get('artists', [{}])[0].get('artist_name', "")
    track['remixer'] = item.get('NOT_SET_KEY')
    track['label'] = item.get('label', "").get('label_name', "")
    track['genre'] = item.get('genre', [{}])[0].get('genre_name', "")
    track['key'] = item.get('key_name', "")
    track['released'] = item.get('publish_date', "")
    track['price'] = item.get('price', "").get('value', "")
    trackList.append(track)

In [108]:
trackList[0]

{'title': 'Body Rock EP',
 'mix': 'Original Mix',
 'artist': 'Body Rock',
 'remixer': None,
 'label': 'Essential Media Group',
 'genre': 'House',
 'key': 'Eb Major',
 'released': '2010-03-11T00:00:00',
 'price': 1.49}