In [1]:
import requests
from bs4 import BeautifulSoup
import json

In [2]:
bandcamp_url = "https://horsevision.bandcamp.com/album/another-life"
bandcamp_track_url = "https://horsevision.bandcamp.com/track/how-are-we"

In [3]:
def extract_jsonld_from_bandcamp(bandcamp_url: str)-> json:
    """
    Extract JSON-LD structured data from a Bandcamp page.
    
    Args:
        url: Bandcamp track or album URL
        
    Returns:
        Parsed JSON data as a Python dictionary, or None if not found
    """
    try:
        #1. Fetch the page
        response = requests.get(bandcamp_url, timeout=10)
        response.raise_for_status()  # Raise error for bad status codes
        
        # 2. Parse HTML with BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        
        #3. Find the script tag with JSON-LD
        # Look for: <script type="application/ld+json">
        jsonld_script = soup.find('script', {'type': 'application/ld+json'})
        
        if not jsonld_script:
            print("No JSON-LD script tag found")
            return None
        
        #4. Extract the text content from the script tag
        jsonld_text = jsonld_script.string
        
        #5. Parse the JSON string into a Python dictionary
        jsonld_data = json.loads(jsonld_text)
        
        return jsonld_data
        
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

In [4]:
jsonld_data = extract_jsonld_from_bandcamp(bandcamp_track_url)
print(jsonld_data)

{'@type': 'MusicRecording', '@id': 'https://horsevision.bandcamp.com/track/how-are-we', 'additionalProperty': [{'@type': 'PropertyValue', 'name': 'track_id', 'value': 2859991095}, {'@type': 'PropertyValue', 'name': 'art_id', 'value': 3703690842}, {'@type': 'PropertyValue', 'name': 'license_name', 'value': 'all_rights_reserved'}, {'@type': 'PropertyValue', 'name': 'tracknum', 'value': 1}], 'name': 'How Are We', 'creditText': 'w/p by Horse Vision\nmixed by Johan Nilsson\nmastered by Jacob Günther Andersen', 'isrcCode': 'QZNWP2551523', 'duration': 'P00H02M40S', 'dateModified': '08 Mar 2025 16:52:55 GMT', 'datePublished': '07 Mar 2025 00:00:00 GMT', 'inAlbum': {'@type': 'MusicAlbum', 'name': 'Another Life', 'albumRelease': [{'@type': 'MusicRelease', '@id': 'https://horsevision.bandcamp.com/album/another-life', 'additionalProperty': [{'@type': 'PropertyValue', 'name': 'item_type', 'value': 'a'}]}, {'@type': ['MusicRelease', 'Product'], '@id': 'https://horsevision.bandcamp.com/track/how-are-

In [5]:
#Required fields
fields = (
    'track_type',
    'track_name',
    'artist',
    'album_name',
    'mix_page',
    'record_label',
    'genre',            
    'purchase_link'
)

fields = (
    'streaming_platform',
    'streaming_link'
) 

In [None]:
#Pull out relevant info
def generate_bandcamp_meta_data_dictionary(jsonld_data: json) -> dict:
    '''
    '''
    bandcamp_meta_data_dict = {
        'track_name': jsonld_data.get('name'),
        'artist': jsonld_data.get('byArtist', {}).get('name'),
        'album': jsonld_data.get('inAlbum', {}).get('name'),

    }
    return bandcamp_meta_data_dict

In [8]:
x = jsonld_data.get('inAlbum', {}).get('name')
x

'Another Life'

# YouTube stuff

In [1]:
url_string = "https://soundcloud.com/wakinglifeportugal/etape"

In [None]:
def clean_url_string(url_string: str) -> str:
    '''
    
    '''
    split_url_srting = url_string.split(".com")
    clean_url = split_url_srting[0]
    return clean_url

In [7]:
split_url_srting = url_string.split(".com")
split_url_srting

['https://soundcloud', '/wakinglifeportugal/etape']

In [8]:
clean_url = split_url_srting[0]
clean_url

'https://soundcloud'

In [11]:
PLATFORM_CHOICES = [
    'youtube',
    'spotify',
    'soundcloud',
    'nina',
    'bandcamp',
    ]

## OPTION 1 - SIMPLE

In [12]:
for choice in PLATFORM_CHOICES:
    if choice in clean_url:
        print("yes")
        break

yes


## OPTION 2 - MORE COMPLEX

In [14]:
any(platform in clean_url for platform in PLATFORM_CHOICES)

True

In [17]:
es = bool(set(url_string.split(".com")) & set(PLATFORM_CHOICES))
es

False

## TESTING YOUTUBE INTEGRATION

In [2]:
youtube_url =  "https://www.youtube.com/watch?v=zYta6v1wZiI&list=LL&index=1"

In [None]:
def get_youtube_metadata(video_id: str) -> dict:
    youtube = build("youtube", "v3", developerKey=settings.YOUTUBE_API_KEY)

    response = youtube.videos().list(
        part="snippet",
        id=video_id
    ).execute()

    #Extract snippet from the response
    items = response["items"]
    item=items[0]
    snippet = item["snippet"]

    #Get the title out relevant fields
    title = snippet.get("title")
    #Get the artist from channel title
    channel_title = snippet.get("channelTitle")
    artist = get_artist_from_channel_title(channel_title)
    #Get description
    description = snippet.get("description")

    #Create meta_data_dict
    meta_data_dict = {
        "title": title,
        "artist": artist,
        "description": description,
    }
    return meta_data_dict
