Skip to content

Commit

Permalink
Merge pull request from GHSA-q764-g6fm-555v
Browse files Browse the repository at this point in the history
* Improve URL and URI handling

* Back to SpotifyException for backward-compatibility

* fix copy paste typo

* TODO v3 comments

Co-authored-by: Stephane Bruckert <stephane.bruckert@gmail.com>
  • Loading branch information
Shaderbug and stephanebruckert committed Jan 23, 2023
1 parent 262e7a0 commit b1db0b6
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 17 deletions.
8 changes: 6 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased

// Add new changes below this line
- Modified docstring for playlist_add_items() to accept "only URIs or URLs",
with intended deprecation for IDs in v3

### Added

- Add alternative module installation instruction to README
- Added Comment to README - Getting Started for user to add URI to app in Spotify Developer Dashboard.
- Added playlist_add_tracks.py to example folder

### Changed

- Modified docstring for playlist_add_items() to accept "only URIs or URLs",
with intended deprecation for IDs in v3

### Fixed

- Path traversal vulnerability that may lead to type confusion in URI handling code
- Update contributing.md

### Removed
Expand Down
62 changes: 47 additions & 15 deletions spotipy/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import json
import logging
import re
import warnings

import requests
Expand Down Expand Up @@ -96,6 +97,29 @@ class Spotify(object):
"US",
"UY"]

# Spotify URI scheme defined in [1], and the ID format as base-62 in [2].
#
# Unfortunately the IANA specification is out of date and doesn't include the new types
# show and episode. Additionally, for the user URI, it does not specify which characters
# are valid for usernames, so the assumption is alphanumeric which coincidentially are also
# the same ones base-62 uses.
# In limited manual exploration this seems to hold true, as newly accounts are assigned an
# identifier that looks like the base-62 of all other IDs, but some older accounts only have
# numbers and even older ones seemed to have been allowed to freely pick this name.
#
# [1] https://www.iana.org/assignments/uri-schemes/prov/spotify
# [2] https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
_regex_spotify_uri = r'^spotify:(?P<type>track|artist|album|playlist|show|episode|user):(?P<id>[0-9A-Za-z]+)$'

# Spotify URLs are defined at [1]. The assumption is made that they are all
# pointing to open.spotify.com, so a regex is used to parse them as well,
# instead of a more complex URL parsing function.
#
# [1] https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
_regex_spotify_url = r'^(http[s]?:\/\/)?open.spotify.com\/(?P<type>track|artist|album|playlist|show|episode|user)\/(?P<id>[0-9A-Za-z]+)(\?.*)?$'

_regex_base62 = r'^[0-9A-Za-z]+$'

def __init__(
self,
auth=None,
Expand Down Expand Up @@ -1940,20 +1964,28 @@ def _append_device_id(self, path, device_id):
return path

def _get_id(self, type, id):
fields = id.split(":")
if len(fields) >= 3:
if type != fields[-2]:
logger.warning('Expected id of type %s but found type %s %s',
type, fields[-2], id)
return fields[-1].split("?")[0]
fields = id.split("/")
if len(fields) >= 3:
itype = fields[-2]
if type != itype:
logger.warning('Expected id of type %s but found type %s %s',
type, itype, id)
return fields[-1].split("?")[0]
return id
uri_match = re.search(Spotify._regex_spotify_uri, id)
if uri_match is not None:
uri_match_groups = uri_match.groupdict()
if uri_match_groups['type'] != type:
# TODO change to a ValueError in v3

This comment has been minimized.

Copy link
@stephanebruckert

stephanebruckert Jan 24, 2023

Author Member

TODO in #652

raise SpotifyException(400, -1, "Unexpected Spotify URI type.")
return uri_match_groups['id']

url_match = re.search(Spotify._regex_spotify_url, id)
if url_match is not None:
url_match_groups = url_match.groupdict()
if url_match_groups['type'] != type:
raise SpotifyException(400, -1, "Unexpected Spotify URL type.")
# TODO change to a ValueError in v3
return url_match_groups['id']

# Raw identifiers might be passed, ensure they are also base-62
if re.search(Spotify._regex_base62, id) is not None:
return id

# TODO change to a ValueError in v3
raise SpotifyException(400, -1, "Unsupported URL / URI.")

def _get_uri(self, type, id):
if self._is_uri(id):
Expand All @@ -1962,7 +1994,7 @@ def _get_uri(self, type, id):
return "spotify:" + type + ":" + self._get_id(type, id)

def _is_uri(self, uri):
return uri.startswith("spotify:") and len(uri.split(':')) == 3
return re.search(Spotify._regex_spotify_uri, uri) is not None

def _search_multiple_markets(self, q, limit, offset, type, markets, total):
if total and limit > total:
Expand Down

0 comments on commit b1db0b6

Please sign in to comment.