### Origin of the data

The data used for the queries in the following sections come from the dataset:
- [Discogs](https://www.discogs.com/)

#### 1. Retrieve all releases that were released after January 1, 2017.

In [None]:
%%sql
SELECT *
FROM releases
WHERE released >= '2017-01-01'

#### 2. Find all tracks with a duration longer than 7 hours. Assume the 'duration' column in the 'tracks' table is in seconds.

In [None]:
%%sql
SELECT *
FROM tracks
WHERE duration > 25200

#### 3. Retrieve the titles of 5 releases along with the names of the artists who released them.

In [None]:
%%sql
SELECT releases.title, artists.name
FROM released_by
JOIN artists ON artists.artist_id = released_by.artist_id
JOIN releases ON releases.release_id = released_by.release_id
LIMIT 5

#### 4. List each genre and the number of releases in that genre.

In [None]:
%%sql
SELECT genre, COUNT(*)
FROM releases
GROUP BY genre

#### 5. Identify the top 5 artists who have the most releases.

In [None]:
%%sql
SELECT artists.name, released_by.artist_id, COUNT(*)
FROM released_by
JOIN artists ON artists.artist_id = released_by.artist_id
JOIN releases ON releases.release_id = released_by.release_id
GROUP BY artists.name, released_by.artist_id
ORDER BY COUNT(*) DESC
LIMIT 5

#### 6. Find the artist who has the longest total duration of tracks across all their releases.

In [None]:
%%sql
SELECT artists.name, released_by.artist_id, SUM(tracks.duration)
FROM released_by
JOIN artists ON artists.artist_id = released_by.artist_id
JOIN releases ON releases.release_id = released_by.release_id
JOIN tracks ON releases.release_id = tracks.release_id
GROUP BY artists.name, released_by.artist_id
ORDER BY SUM(tracks.duration) DESC
LIMIT 1

#### 7. Find how many releases that have tracks with duplicate titles.

In [None]:
%%sql
SELECT COUNT(DISTINCT release_id)
FROM (
    SELECT tracks.release_id, tracks.title
    FROM tracks
    GROUP BY tracks.release_id, tracks.title
    HAVING COUNT(*) > 1
)

#### 8. Retrieve the artists with the name of 'Coldplay'.

In [None]:
%%sql
SELECT *
FROM artists
WHERE name = 'Coldplay'

#### 9. List the titles of all releases by that artist in alphabetical order.

%%sql
SELECT DISTINCT releases.title
FROM artists
JOIN released_by ON released_by.artist_id = artists.artist_id
JOIN releases ON released_by.release_id = releases.release_id
WHERE artists.name = 'Coldplay'
ORDER BY releases.title ASC

#### 10. How many tracks from 'Coldplay' have position '1'?

In [None]:
%%sql
SELECT COUNT(DISTINCT tracks.track_id)
FROM artists
JOIN released_by ON released_by.artist_id = artists.artist_id
JOIN releases ON released_by.release_id = releases.release_id
JOIN tracks ON tracks.release_id = releases.release_id
WHERE artists.name = 'Coldplay' AND tracks.position = '1'

#### 11. List the titles of all releases by Coldplay that contain less than 2 tracks.

In [None]:
%%sql
SELECT DISTINCT releases.title
FROM artists
JOIN released_by ON released_by.artist_id = artists.artist_id
JOIN releases ON released_by.release_id = releases.release_id
JOIN tracks ON tracks.release_id = releases.release_id
WHERE artists.name = 'Coldplay'
GROUP BY releases.release_id, releases.title
HAVING COUNT(DISTINCT tracks.track_id) < 2

#### 12. What is the average track duration?

In [None]:
%%sql
SELECT AVG(duration) FROM tracks

#### 13. How many artists have released tracks longer than twice the average?

In [None]:
%%sql
WITH avg_duration AS (
    SELECT AVG(duration) AS a
    FROM tracks
)
SELECT COUNT(*)
FROM (
    SELECT artists.artist_id
    FROM avg_duration, artists
    JOIN released_by ON released_by.artist_id = artists.artist_id
    JOIN releases ON released_by.release_id = releases.release_id
    JOIN tracks ON tracks.release_id = releases.release_id
    GROUP BY artists.artist_id, avg_duration.a
    HAVING MAX(tracks.duration) > avg_duration.a * 2
)

#### 14. What is the title of the album from 'Coldplay' with the most amount of tracks?

In [None]:
%%sql
SELECT releases.title, COUNT(*)
FROM artists
JOIN released_by ON released_by.artist_id = artists.artist_id
JOIN releases ON releases.release_id = released_by.release_id
JOIN tracks ON tracks.release_id = releases.release_id
WHERE artists.name = 'Coldplay'
GROUP BY releases.release_id, releases.title
ORDER BY COUNT(*) DESC
LIMIT 1

#### 15. What is the name of the first artist in alphabetical order with releases in the most genres. Please make sure to exclude "Various Artists".

In [None]:
%%sql
SELECT artists.name AS artist_name, COUNT(DISTINCT releases.genre) AS number_genres
FROM artists
JOIN released_by ON released_by.artist_id = artists.artist_id
JOIN releases ON releases.release_id = released_by.release_id
WHERE artists.name != 'Various Artists'
GROUP BY artists.name, artists.artist_id
ORDER BY number_genres DESC, artist_name ASC
LIMIT 1

#### 16. In what year did they (the artist from the previous question) release their first album?

In [None]:
%%sql
WITH most_genres_artist AS (
    SELECT artists.name AS artist_name, artists.artist_id AS artist_id
    FROM artists
    JOIN released_by ON released_by.artist_id = artists.artist_id
    JOIN releases ON releases.release_id = released_by.release_id
    WHERE artists.name != 'Various Artists'
    GROUP BY artists.name, artists.artist_id
    ORDER BY COUNT(DISTINCT releases.genre) DESC, artist_name ASC
    LIMIT 1
)

SELECT releases.released
FROM most_genres_artist
JOIN released_by ON released_by.artist_id = most_genres_artist.artist_id
JOIN releases ON releases.release_id = released_by.release_id
ORDER BY releases.released ASC
LIMIT 1

#### 17. How many artists have released an album with total track duration above twice the average total track duration?


In [None]:
%%sql
WITH album_durations AS (
    SELECT artists.artist_id AS artist_id, releases.release_id AS release_id, SUM(tracks.duration) AS album_duration
    FROM artists
    JOIN released_by ON released_by.artist_id = artists.artist_id
    JOIN releases ON releases.release_id = released_by.release_id
    JOIN tracks ON releases.release_id = tracks.release_id
    GROUP BY artists.artist_id, releases.release_id
),
album_max_duration AS (
    SELECT artist_id AS artist_id, MAX(album_duration) AS max_duration
    FROM album_durations
    GROUP BY artist_id
),
total_duration AS (
    SELECT release_id, SUM(duration) AS total_duration
    FROM tracks
    GROUP BY release_id
),
average_duration AS (
    SELECT AVG(total_duration) as avg_duration
    FROM total_duration
)

SELECT COUNT(*)
FROM album_max_duration, average_duration
WHERE album_max_duration.max_duration > 2 * average_duration.avg_duration


#### 18. Show the artists have more than 200 releases in total but have no releases with the genre 'Pop' in reversed alphabetical order.

In [None]:
%%sql
SELECT name
FROM (
    SELECT artists.name
    FROM artists
    JOIN released_by ON released_by.artist_id = artists.artist_id
    GROUP BY artists.artist_id, artists.name
    HAVING COUNT(*) > 200
    EXCEPT
    SELECT artists.name
    FROM artists
    JOIN released_by ON released_by.artist_id = artists.artist_id
    JOIN releases ON releases.release_id = released_by.release_id
    WHERE releases.genre = 'Pop'
)
ORDER BY name DESC