# Introduction
Top 100 songs of the year 2006 - 2020

## Project description

Here is supposed to be project description

## Importing libraries

In [67]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import wget
import glob
import os
from twython import Twython
from credentials import * 
import requests
import json
import matplotlib.pyplot as plt
from math import pi

# Downloading data and scraping HTML websites 
Downloading HTML webpages with top 100 songs by year from www.billboard.com
- url - https://www.billboard.com/charts/year-end/hot-100-songs

## Downloading data

Here download data

## Scraping
- Scrape data from downloaded webpages and transform it into a `dataframe`

In [2]:
def get_webpage_data(start_year, end_year):
    
    df_full = pd.DataFrame()
    
    for i in range(start_year,end_year+1):
        year = i
    
        year = str(year)

        web_page = BeautifulSoup(open('../Top-songs-2006-2020/HTMLs/Top_100_'+year+'.html'), 'html.parser')

        all_titles = web_page.select('.ye-chart-item__title')
        all_artist = web_page.select('.ye-chart-item__artist')
        all_ranks = web_page.select('.ye-chart-item__rank')

        ranks_list = []
        for rank in all_ranks:
            ranks_list.append(rank.get_text().replace('\n', ''))

        titles_list = []
        for title in all_titles:
            titles_list.append(title.get_text().replace('\n', ''))

        artist_list = []
        for artist in all_artist:
            artist_list.append(artist.get_text().replace('\n', ''))

        year_list = []
        for year in web_page.select('.ye-chart-item__primary-row'):
            year_list.append(year['data-date'])

        df = pd.DataFrame(data = [ranks_list, titles_list, artist_list, year_list]).T
        df.columns = ['Rank', 'Title', 'Artist', 'Year']

        df_full = df_full.append(df)
        
    return df_full

In [3]:
full_df = get_webpage_data(2006,2020)
full_df.reset_index(drop = True, inplace = True)
full_df

Unnamed: 0,Rank,Title,Artist,Year
0,1,Bad Day,Daniel Powter,2006
1,2,Temperature,Sean Paul,2006
2,3,Promiscuous,Nelly Furtado Featuring Timbaland,2006
3,4,You're Beautiful,James Blunt,2006
4,5,Hips Don't Lie,Shakira Featuring Wyclef Jean,2006
...,...,...,...,...
1493,96,More Than My Hometown,Morgan Wallen,2020
1494,97,Lovin' On You,Luke Combs,2020
1495,98,Said Sum,Moneybagg Yo,2020
1496,99,Slide,H.E.R. Featuring YG,2020


# Spotify API

## Set up API

In [4]:
%run spotify_api_secret-Copy1.ipynb

In [5]:
auth_url = 'https://accounts.spotify.com/api/token'

auth_response = requests.post(auth_url, {
    'grant_type':'client_credentials',
    'client_id': client_id,
    'client_secret': client_secret,
})

auth_response_data = auth_response.json()
access_token = auth_response_data['access_token']
headers = {'Authorization': 'Bearer {token}'.format(token = access_token)}

## Extracitng data from Spotify API

### Extracting track's ids, etc.

In [6]:
full_df.head(1)

Unnamed: 0,Rank,Title,Artist,Year
0,1,Bad Day,Daniel Powter,2006


In [7]:
track_names_spot = []
track_ids_spot = []
track_duration_spot = []
track_explicit_spot = []
track_popularity_spot = []
artist_name_spot = []

for track_name in full_df.Title:
    
    url = 'https://api.spotify.com/v1/search'

    track = requests.get(url, headers = headers, params = {'q':track_name, 'type':'track'}).json()
    
    track_names_spot.append(track['tracks']['items'][0]['name'])
    track_ids_spot.append(track['tracks']['items'][0]['id'])
    artist_name_spot.append(track['tracks']['items'][0]['album']['artists'][0]['name'])
    track_duration_spot.append(track['tracks']['items'][0]['duration_ms'])
    track_explicit_spot.append(track['tracks']['items'][0]['explicit'])
    track_popularity_spot.append(track['tracks']['items'][0]['popularity'])

In [8]:
df_spotify = pd.DataFrame(data = [track_names_spot, artist_name_spot, track_ids_spot, 
                    track_duration_spot, track_popularity_spot, track_explicit_spot]).T
df_spotify.columns = ['Title_spot', 'Artist_spot', 'Track_id', 'Track_duration_ms', 
                      'Track_popularity', 'Track_explicit']
df_spotify.head(3)

Unnamed: 0,Title_spot,Artist_spot,Track_id,Track_duration_ms,Track_popularity,Track_explicit
0,Bad Day - Single Version,Various Artists,64HJothMOwTKVWDzKxbZdR,145680,0,False
1,Temperature,Sean Paul,0k2GOhqsrxDTAbFFSdNJjT,218573,79,False
2,Promiscuous,Nelly Furtado,2gam98EZKrF9XuOkU13ApN,242293,82,False


### Merging two DataFrames

In [9]:
df_spotify.head(3)

Unnamed: 0,Title_spot,Artist_spot,Track_id,Track_duration_ms,Track_popularity,Track_explicit
0,Bad Day - Single Version,Various Artists,64HJothMOwTKVWDzKxbZdR,145680,0,False
1,Temperature,Sean Paul,0k2GOhqsrxDTAbFFSdNJjT,218573,79,False
2,Promiscuous,Nelly Furtado,2gam98EZKrF9XuOkU13ApN,242293,82,False


In [10]:
full_df.head(3)

Unnamed: 0,Rank,Title,Artist,Year
0,1,Bad Day,Daniel Powter,2006
1,2,Temperature,Sean Paul,2006
2,3,Promiscuous,Nelly Furtado Featuring Timbaland,2006


In [11]:
df = full_df.merge(right = df_spotify, left_index = True, right_index = True)
df

Unnamed: 0,Rank,Title,Artist,Year,Title_spot,Artist_spot,Track_id,Track_duration_ms,Track_popularity,Track_explicit
0,1,Bad Day,Daniel Powter,2006,Bad Day - Single Version,Various Artists,64HJothMOwTKVWDzKxbZdR,145680,0,False
1,2,Temperature,Sean Paul,2006,Temperature,Sean Paul,0k2GOhqsrxDTAbFFSdNJjT,218573,79,False
2,3,Promiscuous,Nelly Furtado Featuring Timbaland,2006,Promiscuous,Nelly Furtado,2gam98EZKrF9XuOkU13ApN,242293,82,False
3,4,You're Beautiful,James Blunt,2006,You're Beautiful,James Blunt,0vg4WnUWvze6pBOJDTq99k,209493,77,True
4,5,Hips Don't Lie,Shakira Featuring Wyclef Jean,2006,Hips Don't Lie (feat. Wyclef Jean),Shakira,3ZFTkvIE7kyPt6Nu3PEa7V,218093,84,False
...,...,...,...,...,...,...,...,...,...,...
1493,96,More Than My Hometown,Morgan Wallen,2020,More Than My Hometown,Morgan Wallen,5OELUCYgOHKFAvCERnAvfS,216573,76,False
1494,97,Lovin' On You,Luke Combs,2020,Lovin' On You,Luke Combs,0nYvjcSlCgjcwogQAwIwNp,194866,72,False
1495,98,Said Sum,Moneybagg Yo,2020,Said Sum,Moneybagg Yo,3sKz6Sd72K0ofPWcJPPk6H,155168,72,True
1496,99,Slide,H.E.R. Featuring YG,2020,Slide,H.E.R.,2rTnVB1bvwxHtaIl4uVu7f,238321,73,True


### Extracting Audio Features of the tracks

In [12]:
requests_list = []

for track_id in df.Track_id:
    
    url = 'https://api.spotify.com/v1/audio-features/'+track_id
    
    requests_list.append(requests.get(url, headers = headers).json())

In [13]:
df_audio_feautures = pd.DataFrame()

i = 0

for i in range(len(requests_list)):
    
    temp_df = pd.DataFrame.from_dict(requests_list[i], orient = 'Index').T
    
    df_audio_feautures = df_audio_feautures.append(temp_df)
    
    i += 1

In [14]:
df_audio_feautures.reset_index(drop = True, inplace = True)
df_audio_feautures

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.676,0.509,11,-6.838,0,0.0423,0.0221,0,0.221,0.313,81.981,audio_features,64HJothMOwTKVWDzKxbZdR,spotify:track:64HJothMOwTKVWDzKxbZdR,https://api.spotify.com/v1/tracks/64HJothMOwTK...,https://api.spotify.com/v1/audio-analysis/64HJ...,145680,4
1,0.951,0.6,0,-4.675,0,0.0685,0.106,0,0.0712,0.822,125.04,audio_features,0k2GOhqsrxDTAbFFSdNJjT,spotify:track:0k2GOhqsrxDTAbFFSdNJjT,https://api.spotify.com/v1/tracks/0k2GOhqsrxDT...,https://api.spotify.com/v1/audio-analysis/0k2G...,218573,4
2,0.808,0.97,10,-6.098,0,0.0506,0.0569,0.000061,0.154,0.868,114.328,audio_features,2gam98EZKrF9XuOkU13ApN,spotify:track:2gam98EZKrF9XuOkU13ApN,https://api.spotify.com/v1/tracks/2gam98EZKrF9...,https://api.spotify.com/v1/audio-analysis/2gam...,242293,4
3,0.675,0.479,0,-9.87,0,0.0278,0.633,0.000018,0.088,0.454,81.998,audio_features,0vg4WnUWvze6pBOJDTq99k,spotify:track:0vg4WnUWvze6pBOJDTq99k,https://api.spotify.com/v1/tracks/0vg4WnUWvze6...,https://api.spotify.com/v1/audio-analysis/0vg4...,209493,4
4,0.778,0.824,10,-5.892,0,0.0707,0.284,0,0.405,0.758,100.024,audio_features,3ZFTkvIE7kyPt6Nu3PEa7V,spotify:track:3ZFTkvIE7kyPt6Nu3PEa7V,https://api.spotify.com/v1/tracks/3ZFTkvIE7kyP...,https://api.spotify.com/v1/audio-analysis/3ZFT...,218093,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1493,0.62,0.869,6,-5.479,1,0.0462,0.615,0,0.131,0.597,126.019,audio_features,5OELUCYgOHKFAvCERnAvfS,spotify:track:5OELUCYgOHKFAvCERnAvfS,https://api.spotify.com/v1/tracks/5OELUCYgOHKF...,https://api.spotify.com/v1/audio-analysis/5OEL...,216573,4
1494,0.572,0.949,4,-4.865,1,0.06,0.00165,0.000195,0.163,0.53,118.974,audio_features,0nYvjcSlCgjcwogQAwIwNp,spotify:track:0nYvjcSlCgjcwogQAwIwNp,https://api.spotify.com/v1/tracks/0nYvjcSlCgjc...,https://api.spotify.com/v1/audio-analysis/0nYv...,194867,4
1495,0.929,0.667,8,-6.789,0,0.353,0.0185,0,0.1,0.274,126.998,audio_features,3sKz6Sd72K0ofPWcJPPk6H,spotify:track:3sKz6Sd72K0ofPWcJPPk6H,https://api.spotify.com/v1/tracks/3sKz6Sd72K0o...,https://api.spotify.com/v1/audio-analysis/3sKz...,155168,4
1496,0.827,0.469,10,-9.141,0,0.341,0.0807,0.000008,0.207,0.196,97.028,audio_features,2rTnVB1bvwxHtaIl4uVu7f,spotify:track:2rTnVB1bvwxHtaIl4uVu7f,https://api.spotify.com/v1/tracks/2rTnVB1bvwxH...,https://api.spotify.com/v1/audio-analysis/2rTn...,238321,4


### Cleaning Spotify data frame

#### Dealing with extraction error (not needed if extraction limit not exceeded)

In [33]:
'''

len(df_audio_feautures)
df_audio_feautures.error.unique()[1:]
len(df_audio_feautures[df_audio_feautures.error == 'API rate limit exceeded'])
len(df_audio_feautures[df_audio_feautures.error == 'analysis not found'])
len(df_audio_feautures[df_audio_feautures.error == '429'])
len(df_audio_feautures[df_audio_feautures.error == '404'])
df_audio_feautures.drop(df_audio_feautures[df_audio_feautures.error == 'API rate limit exceeded'].index, 
                    inplace = True)

df_audio_feautures.drop(df_audio_feautures[df_audio_feautures.error == 'analysis not found'].index, 
                    inplace = True)

len(df_audio_feautures)

'''

"\n\nlen(df_audio_feautures)\ndf_audio_feautures.error.unique()[1:]\nlen(df_audio_feautures[df_audio_feautures.error == 'API rate limit exceeded'])\nlen(df_audio_feautures[df_audio_feautures.error == 'analysis not found'])\nlen(df_audio_feautures[df_audio_feautures.error == '429'])\nlen(df_audio_feautures[df_audio_feautures.error == '404'])\ndf_audio_feautures.drop(df_audio_feautures[df_audio_feautures.error == 'API rate limit exceeded'].index, \n                    inplace = True)\n\ndf_audio_feautures.drop(df_audio_feautures[df_audio_feautures.error == 'analysis not found'].index, \n                    inplace = True)\n\nlen(df_audio_feautures)\n\n"

# Merge dataframes to the final dataframe

In [18]:
df_audio_feautures.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.676,0.509,11,-6.838,0,0.0423,0.0221,0.0,0.221,0.313,81.981,audio_features,64HJothMOwTKVWDzKxbZdR,spotify:track:64HJothMOwTKVWDzKxbZdR,https://api.spotify.com/v1/tracks/64HJothMOwTK...,https://api.spotify.com/v1/audio-analysis/64HJ...,145680,4
1,0.951,0.6,0,-4.675,0,0.0685,0.106,0.0,0.0712,0.822,125.04,audio_features,0k2GOhqsrxDTAbFFSdNJjT,spotify:track:0k2GOhqsrxDTAbFFSdNJjT,https://api.spotify.com/v1/tracks/0k2GOhqsrxDT...,https://api.spotify.com/v1/audio-analysis/0k2G...,218573,4
2,0.808,0.97,10,-6.098,0,0.0506,0.0569,6.1e-05,0.154,0.868,114.328,audio_features,2gam98EZKrF9XuOkU13ApN,spotify:track:2gam98EZKrF9XuOkU13ApN,https://api.spotify.com/v1/tracks/2gam98EZKrF9...,https://api.spotify.com/v1/audio-analysis/2gam...,242293,4
3,0.675,0.479,0,-9.87,0,0.0278,0.633,1.8e-05,0.088,0.454,81.998,audio_features,0vg4WnUWvze6pBOJDTq99k,spotify:track:0vg4WnUWvze6pBOJDTq99k,https://api.spotify.com/v1/tracks/0vg4WnUWvze6...,https://api.spotify.com/v1/audio-analysis/0vg4...,209493,4
4,0.778,0.824,10,-5.892,0,0.0707,0.284,0.0,0.405,0.758,100.024,audio_features,3ZFTkvIE7kyPt6Nu3PEa7V,spotify:track:3ZFTkvIE7kyPt6Nu3PEa7V,https://api.spotify.com/v1/tracks/3ZFTkvIE7kyP...,https://api.spotify.com/v1/audio-analysis/3ZFT...,218093,4


In [19]:
df.head()

Unnamed: 0,Rank,Title,Artist,Year,Title_spot,Artist_spot,Track_id,Track_duration_ms,Track_popularity,Track_explicit
0,1,Bad Day,Daniel Powter,2006,Bad Day - Single Version,Various Artists,64HJothMOwTKVWDzKxbZdR,145680,0,False
1,2,Temperature,Sean Paul,2006,Temperature,Sean Paul,0k2GOhqsrxDTAbFFSdNJjT,218573,79,False
2,3,Promiscuous,Nelly Furtado Featuring Timbaland,2006,Promiscuous,Nelly Furtado,2gam98EZKrF9XuOkU13ApN,242293,82,False
3,4,You're Beautiful,James Blunt,2006,You're Beautiful,James Blunt,0vg4WnUWvze6pBOJDTq99k,209493,77,True
4,5,Hips Don't Lie,Shakira Featuring Wyclef Jean,2006,Hips Don't Lie (feat. Wyclef Jean),Shakira,3ZFTkvIE7kyPt6Nu3PEa7V,218093,84,False


## Checking if both dataframe's indexes share the same track id

In [138]:
number = 1234

In [139]:
df_audio_feautures.iloc[number].id

'6Qn5zhYkTa37e91HC1D7lb'

In [140]:
df.iloc[number].Track_id

'6Qn5zhYkTa37e91HC1D7lb'

## Merging two dataframes

In [63]:
df_merged = df.merge(right = df_audio_feautures, left_index = True, right_index = True)

In [64]:
df_merged.head(3)

Unnamed: 0,Rank,Title,Artist,Year,Title_spot,Artist_spot,Track_id,Track_duration_ms,Track_popularity,Track_explicit,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1,Bad Day,Daniel Powter,2006,Bad Day - Single Version,Various Artists,64HJothMOwTKVWDzKxbZdR,145680,0,False,...,0.221,0.313,81.981,audio_features,64HJothMOwTKVWDzKxbZdR,spotify:track:64HJothMOwTKVWDzKxbZdR,https://api.spotify.com/v1/tracks/64HJothMOwTK...,https://api.spotify.com/v1/audio-analysis/64HJ...,145680,4
1,2,Temperature,Sean Paul,2006,Temperature,Sean Paul,0k2GOhqsrxDTAbFFSdNJjT,218573,79,False,...,0.0712,0.822,125.04,audio_features,0k2GOhqsrxDTAbFFSdNJjT,spotify:track:0k2GOhqsrxDTAbFFSdNJjT,https://api.spotify.com/v1/tracks/0k2GOhqsrxDT...,https://api.spotify.com/v1/audio-analysis/0k2G...,218573,4
2,3,Promiscuous,Nelly Furtado Featuring Timbaland,2006,Promiscuous,Nelly Furtado,2gam98EZKrF9XuOkU13ApN,242293,82,False,...,0.154,0.868,114.328,audio_features,2gam98EZKrF9XuOkU13ApN,spotify:track:2gam98EZKrF9XuOkU13ApN,https://api.spotify.com/v1/tracks/2gam98EZKrF9...,https://api.spotify.com/v1/audio-analysis/2gam...,242293,4


## Cleaning df_tracks data frame

In [65]:
df_merged.columns

Index(['Rank', 'Title', 'Artist', 'Year', 'Title_spot', 'Artist_spot',
       'Track_id', 'Track_duration_ms', 'Track_popularity', 'Track_explicit',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms',
       'time_signature'],
      dtype='object')

In [66]:
df_merged.drop(columns = ['analysis_url', 'track_href', 'uri', 'id', 'mode', 
                          'Title_spot', 'Artist_spot', 'duration_ms', 'type'], inplace = True)

In [106]:
df_merged

Unnamed: 0,Rank,Title,Artist,Year,Track_id,Track_duration_ms,Track_popularity,Track_explicit,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,1,Bad Day,Daniel Powter,2006,64HJothMOwTKVWDzKxbZdR,145680,0,False,0.676,0.509,11,-6.838,0.0423,0.0221,0,0.221,0.313,81.981,4
1,2,Temperature,Sean Paul,2006,0k2GOhqsrxDTAbFFSdNJjT,218573,79,False,0.951,0.6,0,-4.675,0.0685,0.106,0,0.0712,0.822,125.04,4
2,3,Promiscuous,Nelly Furtado Featuring Timbaland,2006,2gam98EZKrF9XuOkU13ApN,242293,82,False,0.808,0.97,10,-6.098,0.0506,0.0569,0.000061,0.154,0.868,114.328,4
3,4,You're Beautiful,James Blunt,2006,0vg4WnUWvze6pBOJDTq99k,209493,77,True,0.675,0.479,0,-9.87,0.0278,0.633,0.000018,0.088,0.454,81.998,4
4,5,Hips Don't Lie,Shakira Featuring Wyclef Jean,2006,3ZFTkvIE7kyPt6Nu3PEa7V,218093,84,False,0.778,0.824,10,-5.892,0.0707,0.284,0,0.405,0.758,100.024,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1493,96,More Than My Hometown,Morgan Wallen,2020,5OELUCYgOHKFAvCERnAvfS,216573,76,False,0.62,0.869,6,-5.479,0.0462,0.615,0,0.131,0.597,126.019,4
1494,97,Lovin' On You,Luke Combs,2020,0nYvjcSlCgjcwogQAwIwNp,194866,72,False,0.572,0.949,4,-4.865,0.06,0.00165,0.000195,0.163,0.53,118.974,4
1495,98,Said Sum,Moneybagg Yo,2020,3sKz6Sd72K0ofPWcJPPk6H,155168,72,True,0.929,0.667,8,-6.789,0.353,0.0185,0,0.1,0.274,126.998,4
1496,99,Slide,H.E.R. Featuring YG,2020,2rTnVB1bvwxHtaIl4uVu7f,238321,73,True,0.827,0.469,10,-9.141,0.341,0.0807,0.000008,0.207,0.196,97.028,4


In [75]:
df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1498 entries, 0 to 1497
Data columns (total 19 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Rank               1498 non-null   object
 1   Title              1498 non-null   object
 2   Artist             1498 non-null   object
 3   Year               1498 non-null   object
 4   Track_id           1498 non-null   object
 5   Track_duration_ms  1498 non-null   object
 6   Track_popularity   1498 non-null   object
 7   Track_explicit     1498 non-null   object
 8   danceability       1498 non-null   object
 9   energy             1498 non-null   object
 10  key                1498 non-null   object
 11  loudness           1498 non-null   object
 12  speechiness        1498 non-null   object
 13  acousticness       1498 non-null   object
 14  instrumentalness   1498 non-null   object
 15  liveness           1498 non-null   object
 16  valence            1498 non-null   object


In [117]:
df_merged.Rank = pd.to_numeric(df_merged.Rank)


In [118]:
df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1498 entries, 0 to 1497
Data columns (total 19 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Rank               1498 non-null   int64 
 1   Title              1498 non-null   object
 2   Artist             1498 non-null   object
 3   Year               1498 non-null   object
 4   Track_id           1498 non-null   object
 5   Track_duration_ms  1498 non-null   object
 6   Track_popularity   1498 non-null   object
 7   Track_explicit     1498 non-null   object
 8   danceability       1498 non-null   object
 9   energy             1498 non-null   object
 10  key                1498 non-null   object
 11  loudness           1498 non-null   object
 12  speechiness        1498 non-null   object
 13  acousticness       1498 non-null   object
 14  instrumentalness   1498 non-null   object
 15  liveness           1498 non-null   object
 16  valence            1498 non-null   object


In [125]:
for col in ['Rank','Track_duration_ms','Track_popularity','danceability','energy', 'key', 'loudness', 
            'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 
            'time_signature']:
    df_merged[col] = pd.to_numeric(df_merged[col],errors='coerce').fillna(0)

In [128]:
df_merged.Year = pd.to_datetime(df_merged.Year)

In [129]:
df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1498 entries, 0 to 1497
Data columns (total 19 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Rank               1498 non-null   int64         
 1   Title              1498 non-null   object        
 2   Artist             1498 non-null   object        
 3   Year               1498 non-null   datetime64[ns]
 4   Track_id           1498 non-null   object        
 5   Track_duration_ms  1498 non-null   int64         
 6   Track_popularity   1498 non-null   int64         
 7   Track_explicit     1498 non-null   object        
 8   danceability       1498 non-null   float64       
 9   energy             1498 non-null   float64       
 10  key                1498 non-null   int64         
 11  loudness           1498 non-null   float64       
 12  speechiness        1498 non-null   float64       
 13  acousticness       1498 non-null   float64       
 14  instrume

In [141]:
df_merged

Unnamed: 0,Rank,Title,Artist,Year,Track_id,Track_duration_ms,Track_popularity,Track_explicit,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,1,Bad Day,Daniel Powter,2006-01-01,64HJothMOwTKVWDzKxbZdR,145680,0,False,0.676,0.509,11,-6.838,0.0423,0.02210,0.000000,0.2210,0.313,81.981,4
1,2,Temperature,Sean Paul,2006-01-01,0k2GOhqsrxDTAbFFSdNJjT,218573,79,False,0.951,0.600,0,-4.675,0.0685,0.10600,0.000000,0.0712,0.822,125.040,4
2,3,Promiscuous,Nelly Furtado Featuring Timbaland,2006-01-01,2gam98EZKrF9XuOkU13ApN,242293,82,False,0.808,0.970,10,-6.098,0.0506,0.05690,0.000061,0.1540,0.868,114.328,4
3,4,You're Beautiful,James Blunt,2006-01-01,0vg4WnUWvze6pBOJDTq99k,209493,77,True,0.675,0.479,0,-9.870,0.0278,0.63300,0.000018,0.0880,0.454,81.998,4
4,5,Hips Don't Lie,Shakira Featuring Wyclef Jean,2006-01-01,3ZFTkvIE7kyPt6Nu3PEa7V,218093,84,False,0.778,0.824,10,-5.892,0.0707,0.28400,0.000000,0.4050,0.758,100.024,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1493,96,More Than My Hometown,Morgan Wallen,2020-01-01,5OELUCYgOHKFAvCERnAvfS,216573,76,False,0.620,0.869,6,-5.479,0.0462,0.61500,0.000000,0.1310,0.597,126.019,4
1494,97,Lovin' On You,Luke Combs,2020-01-01,0nYvjcSlCgjcwogQAwIwNp,194866,72,False,0.572,0.949,4,-4.865,0.0600,0.00165,0.000195,0.1630,0.530,118.974,4
1495,98,Said Sum,Moneybagg Yo,2020-01-01,3sKz6Sd72K0ofPWcJPPk6H,155168,72,True,0.929,0.667,8,-6.789,0.3530,0.01850,0.000000,0.1000,0.274,126.998,4
1496,99,Slide,H.E.R. Featuring YG,2020-01-01,2rTnVB1bvwxHtaIl4uVu7f,238321,73,True,0.827,0.469,10,-9.141,0.3410,0.08070,0.000008,0.2070,0.196,97.028,4


### What to extract from spotify:
- release_date
- track id
- artist id
- name (of the artist / band)
- type (artist/band)
- duration (seconds)
- genres
- popularity?
- 

# Visualisation

## rough notebook

In [None]:
'''
audio_features=[]
for track_id in artist_track_id:
    audio_url= 'https://api.spotify.com/v1/audio-features/'+track_id
    res=requests.get(audio_url,headers=headers)
    audio_info=res.json()
    audio_features.append(audio_info)
'''

In [None]:
artist_name = 'Depeche Mode'
url = 'https://api.spotify.com/v1/search'

In [None]:
artist = requests.get(url, params = {'q':artist_name, 'type':'artist', 'market':'DK'}, headers = headers)

In [None]:
artist

In [None]:
url = 'https://api.spotify.com/v1/artists'
requests.get(url, headers = headers, params = {'ids':'762310PdDnwsDxAQxzQkfX'}).json()

In [None]:
# extracting main info from the songs
'''
track_name = "More Than My Hometown"
url = 'https://api.spotify.com/v1/search'

track = requests.get(url, headers = headers, params = {'q':track_name, 'type':'track'}).json()

track_name = track['tracks']['items'][0]['name']
artist_name = track['tracks']['items'][0]['album']['artists'][0]['name']
track_id = track['tracks']['items'][0]['id']
track_duration_ms = track['tracks']['items'][0]['duration_ms']
track_explicit = track['tracks']['items'][0]['explicit']
track_popularity = track['tracks']['items'][0]['popularity']
'''

In [None]:
'''
    track_name = track['tracks']['items'][0]['name']
    artist_name = track['tracks']['items'][0]['album']['artists'][0]['name']
    track_id = track['tracks']['items'][0]['id']
    track_duration_ms = track['tracks']['items'][0]['duration_ms']
    track_explicit = track['tracks']['items'][0]['explicit']
    track_popularity = track['tracks']['items'][0]['popularity']
    '''

In [None]:
'''

url = 'https://api.spotify.com/v1/audio-features'

danceability = [] #555
energy = [] #461
key = []
loudness = []
mode = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []

for track_id in df.Track_id:
    track_request = requests.get(url, headers = headers, params = {'ids':track_id}).json()
    
    danceability.append(track_request['audio_features'][0]['danceability'])
    energy.append(track_request['audio_features'][0]['energy'])
    
'''

In [None]:
'''
# sito nereikia
danceability = []
energy = []
key = []
loudness = []
mode = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []

for track_data in requests_list:
    
    i = 0
    
    for i in range(len(requests_list)):
        
        danceability.append(requests_list[i]['danceability'])
        energy.append(requests_list[i]['energy'])
        key.append(requests_list[i]['key'])
        loudness.append(requests_list[i]['loudness'])
        mode.append(requests_list[i]['mode'])
        speechiness.append(requests_list[i]['speechiness'])
        acousticness.append(requests_list[i]['acousticness'])
        instrumentalness.append(requests_list[i]['instrumentalness'])
        liveness.append(requests_list[i]['liveness'])
        valence.append(requests_list[i]['valence'])
        tempo.append(requests_list[i]['tempo'])
        
        i += 1
        
'''

# Notes
- add spotify API key and secret to different jupyter notebook and run it form here