In [1]:
import pandas as pd
from getpass import getpass
import sqlalchemy as db
import argparse
import pprint
import sys
import os
import subprocess
import json

# Database

## Functions

### Clean Artist's names 

In [309]:
def clean_artist_name(df:pd.DataFrame, chart_col:str, track_col:str='None')-> pd.DataFrame:
    '''
    This function cleans the artist's name column of a dataframe and 
    returns a new dataframe containing all the other (featuring / collaboration) artists' names. 
    It also drops duplicated aritsts' names.
    
    it needs the input dataframe chart's and track's column names.

    example:
    - chart_col = 'chart'
    - track_col = 'track'
    '''
    
    import re
    
    other_artists = []
    charts = []
    tracks = []
    
    df = df.reset_index(drop=True)

    for i in range(df.shape[0]):
        string = df.loc[i,'artist']
        pattern ='(?:Feat(?:\. |uring )|, |\/|\+|&| [Xx] | \(|\)| [Ww]ith | And | y )'
        artists = re.split(pattern, string)
        df.loc[i, 'artist'] = artists[0]
        
        for j in range(1, len(artists)):
            if artists[j] != '':
                
                if track_col == 'None':
                    other_artists.append(artists[j])
                    charts.append(df.loc[i, chart_col])
                else:
                    other_artists.append(artists[j])
                    tracks.append(df.loc[i, track_col])
                    charts.append(df.loc[i, chart_col])
            else:
                pass
 
    if track_col == 'None':
        other_artists_df = pd.DataFrame({'artist': other_artists,
                                         'chart': charts})
        final_df = pd.concat([df, other_artists_df], axis=0).drop_duplicates(subset='artist').reset_index(drop=True)
    else: 
        other_artists_df = pd.DataFrame({'track': tracks,
                                         'artist': other_artists,
                                         'chart': charts})
        final_df = pd.concat([df, other_artists_df], axis=0).drop_duplicates(subset='track').reset_index(drop=True)    


    return final_df

## Billboard API

The first step is to create a database containing the top ranked billboard artists and songs, in order to find features for each one of them (through the Spotify API).

In [2]:
# !pip3 install billboard.py
import billboard

### Data Gathering

A chart entry (typically a single track) is of type ChartEntry. Each of these track instances have the following attributes:

- `title` – The title of the track.
- `artist` – The name of the artist, as formatted on Billboard.com.
- `image` – The URL of the image for the track.
- `peakPos` – The track's peak position on the chart as of the chart date, as an int (or None if the chart does not include this information).
- `lastPos` – The track's position on the previous week's chart, as an int (or None if the chart does not include this information). This value is 0 if the track was not on the previous week's chart.
- `weeks` – The number of weeks the track has been or was on the chart, including future dates (up until the present time).
- `rank` – The track's current position on the chart.
- `isNew` – Whether the track is new to the chart.

**For this dataset only 4 of them will be used:** 
*'title', 'artist', 'image', 'weeks'*

In [3]:
chart_types = billboard.charts()
chart_types

['hot-100',
 'billboard-200',
 'artist-100',
 'social-50',
 'streaming-songs',
 'radio-songs',
 'digital-song-sales',
 'on-demand-songs',
 'top-album-sales',
 'current-albums',
 'catalog-albums',
 'independent-albums',
 'soundtracks',
 'vinyl-albums',
 'greatest-billboard-200-albums',
 'greatest-billboard-200-artists',
 'greatest-hot-100-singles',
 'greatest-hot-100-artists',
 'greatest-hot-100-songs-by-women',
 'greatest-hot-100-women-artists',
 'greatest-billboard-200-albums-by-women',
 'greatest-billboard-200-women-artists',
 'greatest-billboards-top-songs-80s',
 'greatest-billboards-top-songs-90s',
 'greatest-of-all-time-pop-songs',
 'greatest-of-all-time-pop-songs-artists',
 'greatest-adult-pop-songs',
 'greatest-adult-pop-artists',
 'greatest-country-songs',
 'greatest-country-albums',
 'greatest-country-artists',
 'greatest-hot-latin-songs',
 'greatest-hot-latin-songs-artists',
 'greatest-top-dance-club-artists',
 'greatest-r-b-hip-hop-songs',
 'greatest-r-b-hip-hop-albums',
 'g

In [226]:
# Creating a dataset with all the artists from all charts from billboard
#df=pd.DataFrame()
#title_list=[]
#artist_list=[]
#image_list=[]
#weeks_list=[]
#chart_name=[]
#
#for chart_type in chart_types:
#    chart = billboard.ChartData(chart_type)
#    for song in chart:
#        title_list.append(song.title)
#        artist_list.append(song.artist)
#        image_list.append(song.image)
#        weeks_list.append(song.weeks)
#        chart_name.append(chart.name)
#        
#df = pd.DataFrame({'title':title_list, 
#                   'artist':artist_list, 
#                   'image':image_list,
#                   'weeks':weeks_list,
#                   'chart':chart_name})

### Data Cleaning

In [228]:
#df.shape

(35621020, 5)

In [229]:
#df.head()

Unnamed: 0,title,artist,image,weeks,chart
0,Say So,Doja Cat Featuring Nicki Minaj,,18.0,hot-100
0,Say So,Doja Cat Featuring Nicki Minaj,,18.0,hot-100
1,Savage,Megan Thee Stallion Featuring Beyonce,,8.0,hot-100
0,Say So,Doja Cat Featuring Nicki Minaj,,18.0,hot-100
1,Savage,Megan Thee Stallion Featuring Beyonce,,8.0,hot-100


In [234]:
#df=df.drop_duplicates()
#df.isna().sum()

In [236]:
df.shape

(8440, 5)

In [238]:
# df.to_csv('billboard_artists.csv')

In [2]:
bb = pd.read_csv('data/raw/billboard_artists.csv')

In [6]:
bb.head()

Unnamed: 0,title,artist,image,weeks,chart
0,Say So,Doja Cat Featuring Nicki Minaj,,18.0,hot-100
1,Savage,Megan Thee Stallion Featuring Beyonce,,8.0,hot-100
2,Blinding Lights,The Weeknd,,23.0,hot-100
3,Toosie Slide,Drake,,5.0,hot-100
4,The Box,Roddy Ricch,,22.0,hot-100


#### Artists' Data base

In [10]:
# Separating into 2 datasets: TOP ARTISTS LIST & TOP SONGS

In [11]:
bb_artist = bb.copy()

In [13]:
bb_artist=bb_artist.drop_duplicates(subset='artist')

In [14]:
bb_artist

Unnamed: 0,title,artist,image,weeks,chart
0,Say So,Doja Cat Featuring Nicki Minaj,,18.0,hot-100
1,Savage,Megan Thee Stallion Featuring Beyonce,,8.0,hot-100
2,Blinding Lights,The Weeknd,,23.0,hot-100
3,Toosie Slide,Drake,,5.0,hot-100
4,The Box,Roddy Ricch,,22.0,hot-100
...,...,...,...,...,...
8426,Bella Ciao,Manu Pilas,https://www.billboard.com/assets/1588965272/im...,5.0,world-digital-song-sales
8433,Nominate,Stonebwoy & Keri Hilson,https://charts-static.billboard.com/img/1840/1...,1.0,world-digital-song-sales
8434,Du Hast,Rammstein,https://charts-static.billboard.com/img/2015/1...,215.0,world-digital-song-sales
8435,Oh Nanana,dj 6rb & Bonde R300 Featuring XANG & Mayklove,https://www.billboard.com/assets/1588965272/im...,9.0,world-digital-song-sales


In [16]:
bb_artist.isna().sum()

title      281
artist       0
image      204
weeks     1162
chart        0
dtype: int64

In [18]:
bb_artist=bb_artist.drop(columns=['weeks','image','title'])
bb_artist

Unnamed: 0,artist,chart
0,Doja Cat Featuring Nicki Minaj,hot-100
1,Megan Thee Stallion Featuring Beyonce,hot-100
2,The Weeknd,hot-100
3,Drake,hot-100
4,Roddy Ricch,hot-100
...,...,...
8426,Manu Pilas,world-digital-song-sales
8433,Stonebwoy & Keri Hilson,world-digital-song-sales
8434,Rammstein,world-digital-song-sales
8435,dj 6rb & Bonde R300 Featuring XANG & Mayklove,world-digital-song-sales


In [29]:
bb_artist=bb_artist.reset_index(drop=True)

In [38]:
bb_artist.to_csv('data/processed/bb_artists_unique.csv', index=False)

In [208]:
bb_artist = pd.read_csv('data/processed/bb_artists_unique.csv')

In [109]:
bb_artist.shape, bb_artist.head(10)

((2607, 2),
                                   artist    chart
 0         Doja Cat Featuring Nicki Minaj  hot-100
 1  Megan Thee Stallion Featuring Beyonce  hot-100
 2                             The Weeknd  hot-100
 3                                  Drake  hot-100
 4                            Roddy Ricch  hot-100
 5                               Dua Lipa  hot-100
 6          Drake Featuring Playboi Carti  hot-100
 7                            Post Malone  hot-100
 8           DaBaby Featuring Roddy Ricch  hot-100
 9          Justin Bieber Featuring Quavo  hot-100)

In [None]:
# clean dataset with clean_artist_name function 

In [315]:
bb_artist_clean=clean_artist_name(bb_artist, 'chart')

In [316]:
bb_artist_clean.shape

(3021, 2)

In [317]:
bb_artist_clean.head()

Unnamed: 0,artist,chart
0,Doja Cat,hot-100
1,Megan Thee Stallion,hot-100
2,The Weeknd,hot-100
3,Drake,hot-100
4,Roddy Ricch,hot-100


In [318]:
bb_artist_clean.to_csv('data/processed/bb_artists_unique_clean.csv')

#### Songs dataset

In [33]:
# The Top songs dataset

In [211]:
bb_songs = bb.copy()

In [212]:
bb_songs=bb_songs.drop_duplicates(subset='title')

In [213]:
bb_songs.head()

Unnamed: 0,title,artist,image,weeks,chart
0,Say So,Doja Cat Featuring Nicki Minaj,,18.0,hot-100
1,Savage,Megan Thee Stallion Featuring Beyonce,,8.0,hot-100
2,Blinding Lights,The Weeknd,,23.0,hot-100
3,Toosie Slide,Drake,,5.0,hot-100
4,The Box,Roddy Ricch,,22.0,hot-100


In [214]:
bb_songs.isna().sum()

title        1
artist       0
image      292
weeks     1963
chart        0
dtype: int64

In [215]:
bb_songs=bb_songs.drop(columns=['image','weeks'])

In [216]:
bb_songs.isna().sum()

title     1
artist    0
chart     0
dtype: int64

In [217]:
bb_songs.loc[bb_songs.title.isna(),:]

Unnamed: 0,title,artist,chart
300,,Kenny Chesney,artist-100


In [218]:
bb_songs=bb_songs.dropna().reset_index(drop=True)

In [219]:
bb_songs.head(), bb_songs.shape

(             title                                 artist    chart
 0           Say So         Doja Cat Featuring Nicki Minaj  hot-100
 1           Savage  Megan Thee Stallion Featuring Beyonce  hot-100
 2  Blinding Lights                             The Weeknd  hot-100
 3     Toosie Slide                                  Drake  hot-100
 4          The Box                            Roddy Ricch  hot-100,
 (4082, 3))

In [220]:
bb_songs.columns = ['track','artist','chart']

In [221]:
bb_songs.head()

Unnamed: 0,track,artist,chart
0,Say So,Doja Cat Featuring Nicki Minaj,hot-100
1,Savage,Megan Thee Stallion Featuring Beyonce,hot-100
2,Blinding Lights,The Weeknd,hot-100
3,Toosie Slide,Drake,hot-100
4,The Box,Roddy Ricch,hot-100


In [139]:
bb_songs.to_csv('data/processed/bb_songs_unique.csv')

In [319]:
bb_songs = pd.read_csv('data/processed/bb_songs_unique.csv').drop(columns='Unnamed: 0')

In [321]:
bb_songs_unique_clean = clean_artist_name(bb_songs, 'chart', 'track')

In [322]:
bb_songs_unique_clean.to_csv('data/processed/bb_songs_unique_clean.csv')

## Spotify API

### Retrieving Spotify token

In [323]:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

In [324]:
client_id= '1337a27731b840f98acd0c79f0d011f3'
client_secret = '98f2df2fdde44f40ad768a8142b20074'
response_type = 'code'
redirect_uri = 'https://aceworld.com.br/'
state= 'https%3A%2F%2Faceworld.com.br%2F'
scope = 'playlist-read-private scope'

In [325]:
username='dnekdiptrsu4rr1heu145y07j'
playlist = 'spotify:playlist:2QDyvRV0Ct7bGg3AREkL6P'

In [326]:
#Accessing with token
scope = 'user-library-read playlist-read-private'

try:
    token = util.prompt_for_user_token(username, scope,client_id=client_id, 
                                       client_secret=client_secret, 
                                       redirect_uri=redirect_uri)
    sp=spotipy.Spotify(auth= token)
except:
    print('Token is not accesible for ' + username)

In [327]:
token

'BQAOuszkD-iUPOLfnHm-9WKXp2T8vGV0qAraU4zMj0VJaaUzrpar3eHxEZZzP6M3iD2GxBxkVtUNgkriPZBpCx134KHAP6hCAHT9wD1Nrl33YS9V7m2TWrScHH9g8cCdlB08zvPNSCrleFxLVL9j0lzRLmFInUZzi3cOENo7J1nCw7dERocGQzAADBD8ybTzHjillQi0q1EH-jfPbIDG'

In [59]:
#Accessing without token
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, 
                                                      client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

### Get Billboard Artists' Spotify URI

In [328]:
# Finding track uri

artist = 'justin bieber'
track = 'yummy'
q = f'artist:{artist} track:{track}'
results = sp.search(q=q, limit=1, type='track')
print(results['tracks']['items'][0]['uri'])

spotify:track:41L3O37CECZt3N7ziG2z7l


In [66]:
uris = [] 
for i in range(len(sp.search(q='artist:clément leroux', type='track')['tracks']['items'])):
    uris.append(sp.search(q='artist:clément leroux', type='track')['tracks']['items'][i]['uri'])

In [323]:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

In [324]:
client_id= '1337a27731b840f98acd0c79f0d011f3'
client_secret = '98f2df2fdde44f40ad768a8142b20074'
response_type = 'code'
redirect_uri = 'https://aceworld.com.br/'
state= 'https%3A%2F%2Faceworld.com.br%2F'
scope = 'playlist-read-private scope'

In [325]:
username='dnekdiptrsu4rr1heu145y07j'
playlist = 'spotify:playlist:2QDyvRV0Ct7bGg3AREkL6P'

In [326]:
#Accessing with token
scope = 'user-library-read playlist-read-private'

try:
    token = util.prompt_for_user_token(username, scope,client_id=client_id, 
                                       client_secret=client_secret, 
                                       redirect_uri=redirect_uri)
    sp=spotipy.Spotify(auth= token)
except:
    print('Token is not accesible for ' + username)

In [327]:
token

'BQAOuszkD-iUPOLfnHm-9WKXp2T8vGV0qAraU4zMj0VJaaUzrpar3eHxEZZzP6M3iD2GxBxkVtUNgkriPZBpCx134KHAP6hCAHT9wD1Nrl33YS9V7m2TWrScHH9g8cCdlB08zvPNSCrleFxLVL9j0lzRLmFInUZzi3cOENo7J1nCw7dERocGQzAADBD8ybTzHjillQi0q1EH-jfPbIDG'

In [59]:
#Accessing without token
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, 
                                                      client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [328]:
# Finding track uri

artist = 'justin bieber'
track = 'yummy'
q = f'artist:{artist} track:{track}'
results = sp.search(q=q, limit=1, type='track')
print(results['tracks']['items'][0]['uri'])

spotify:track:41L3O37CECZt3N7ziG2z7l


In [85]:
# Finding Audio Features
sp.audio_features(uris[0]) 

[{'danceability': 0.698,
  'energy': 0.751,
  'key': 6,
  'loudness': -11.054,
  'mode': 1,
  'speechiness': 0.0712,
  'acousticness': 0.51,
  'instrumentalness': 0.887,
  'liveness': 0.121,
  'valence': 0.135,
  'tempo': 119.976,
  'type': 'audio_features',
  'id': '5u4dNVgfChjREYJ6KeNuJ4',
  'uri': 'spotify:track:5u4dNVgfChjREYJ6KeNuJ4',
  'track_href': 'https://api.spotify.com/v1/tracks/5u4dNVgfChjREYJ6KeNuJ4',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/5u4dNVgfChjREYJ6KeNuJ4',
  'duration_ms': 238000,
  'time_signature': 4}]

In [118]:
# Track attributes are Tuneable

sp.recommendations(seed_tracks='1HrMWH5GUdK6Yi94rbANJA', min_)

HTTP Error for GET to https://api.spotify.com/v1/recommendations returned 400 due to invalid request


SpotifyException: http status: 400, code:-1 - https://api.spotify.com/v1/recommendations?limit=20&seed_tracks=1%2CH%2Cr%2CM%2CW%2CH%2C5%2CG%2CU%2Cd%2CK%2C6%2CY%2Ci%2C9%2C4%2Cr%2Cb%2CA%2CN%2CJ%2CA:
 invalid request

## POSTGRE

In [329]:
# FUNCTION

def get_spotify_uris(df):
    '''
    this function receives a dataframe with columns of artist and track
    and returns a new df containing spotify unique IDs (uri) for each of the features:
        - track
        - artist
        - album
    '''
    import tqdm
    
    artists_col = []    
    track_col = []    
    chart_col = []        
    tracks_uris = []
    artists_uris = []
    albums_uris = []
    err_tracks=[]
    
    artist_names = df['artist'].values
    track_names = df['track'].values
    chart_names = df['chart'].values
    
    for i in range(len(artist_names)):
        artist = artist_names[i]
        track = track_names[i]
    
        try:
            q = f'artist:{artist} track: {track}'
            results = sp.search(q=q, limit=1, type='track')

            tracks_uris.append(results['tracks']['items'][0]['uri'])
            artists_uris.append(results['tracks']['items'][0]['artists'][0]['uri'])
            albums_uris.append(results['tracks']['items'][0]['album']['uri'])
            artists_col.append(artist)
            track_col.append(track)
            chart_col.append(chart_names[i])
            
        except:
            err_tracks.append([artist,track,chart_names[i]])
            pass
    
    df=pd.DataFrame({'artist': artists_col, 
                    'track': track_col, 
                    'chart': chart_col, 
                    'tracks_uri': tracks_uris, 
                    'artists_uris': artists_uris, 
                    'albums_uris': albums_uris})

    return df


### Extracting audio features

In [None]:
def get_audio_features(saved_uris):
    artist = []
    track = []
    
    danceability = []
    energy = []
    key = []
    loudness = []
    mode = []
    speechiness = []
    acousticness = []
    instrumentalness = []
    liveness = []
    valence = []
    tempo = []
    duration_ms = []

    for uri in saved_uris:
        x = sp.audio_features(uri)
        y = sp.track(uri)
        for audio_features in x:
            danceability.append(audio_features['danceability'])
            energy.append(audio_features['energy'])
            key.append(audio_features['key'])
            loudness.append(audio_features['loudness'])
            mode.append(audio_features['mode'])
            speechiness.append(audio_features['speechiness'])
            acousticness.append(audio_features['acousticness'])
            instrumentalness.append(audio_features['instrumentalness'])
            liveness.append(audio_features['liveness'])
            valence.append(audio_features['valence'])
            tempo.append(audio_features['tempo'])
            duration_ms.append(audio_features['duration_ms'])
            print('append')
      
        artist.append(y['album']['artists'][0]['name'])
        track.append(y['name'])

    df = pd.DataFrame()
    df['artist'] = artist
    df['track'] = track
    df['danceability'] = danceability
    df['energy'] = energy
    df['key'] = key
    df['loudness'] = loudness
    df['mode'] = mode
    df['speechiness'] = speechiness
    df['acousticness'] = acousticness
    df['instrumentalness'] = instrumentalness
    df['liveness'] = liveness
    df['valence'] = valence
    df['tempo'] = tempo
    df['duration_ms'] = duration_ms
    
    df.to_csv('data/OnelateAudioFeatures.csv')
    
    return df

In [None]:
# create the engine
engine = db.create_engine('postgresql+psycopg2://postgres:admin@localhost/aula_conexao')

# localhost = 127.0.0.1

# open the connection
conn = engine.connect()

### Extracting attributes from Artists

**From `GET SEVERAL ARTISTS` ENDPOINT**
- `genres`
- `followers` | total
- `popularity`

In [40]:
for item in results['items']:
    track = item['track']
    print(track['name'] + ' - ' + track['artists'][0]['name'])

In [131]:
db_server = 'postgresql'
user = 'postgres'
password = 'g0disl0vee'
ip = 'localhost'
db_name = 'postgres'

# create the engine
engine = db.create_engine(f'{db_server}://{user}:{password}@{ip}/{db_name}')

# open the connection
conn = engine.connect()

In [133]:
!pwd

/Users/serachung/Desktop/Data_Analytics/Course/Entregas/Projetos/grand-finale


In [140]:
artist = pd.read_table('data/mbdump-derived/mbdump/artist_tag')
artist

Unnamed: 0,468800,29,2,2011-05-16 14:57:06.530063+00
0,522545,63294,1,2017-05-15 20:35:58.480733+00
1,31390,173,1,2011-05-16 14:57:06.530063+00
2,108404,271,1,2011-05-16 14:57:06.530063+00
3,108404,7,1,2011-05-16 14:57:06.530063+00
4,437100,40881,1,2011-10-30 14:16:25.405823+00
...,...,...,...,...
348894,1885810,369,1,2020-02-04 11:39:45.901086+00
348895,1885810,1992,1,2020-02-04 11:39:46.004888+00
348896,1885810,1405,1,2020-02-04 11:39:46.056913+00
348897,1885810,54816,1,2020-02-04 11:39:46.104693+00


In [87]:
sp.recommendation_genre_seeds()

{'genres': ['acoustic',
  'afrobeat',
  'alt-rock',
  'alternative',
  'ambient',
  'anime',
  'black-metal',
  'bluegrass',
  'blues',
  'bossanova',
  'brazil',
  'breakbeat',
  'british',
  'cantopop',
  'chicago-house',
  'children',
  'chill',
  'classical',
  'club',
  'comedy',
  'country',
  'dance',
  'dancehall',
  'death-metal',
  'deep-house',
  'detroit-techno',
  'disco',
  'disney',
  'drum-and-bass',
  'dub',
  'dubstep',
  'edm',
  'electro',
  'electronic',
  'emo',
  'folk',
  'forro',
  'french',
  'funk',
  'garage',
  'german',
  'gospel',
  'goth',
  'grindcore',
  'groove',
  'grunge',
  'guitar',
  'happy',
  'hard-rock',
  'hardcore',
  'hardstyle',
  'heavy-metal',
  'hip-hop',
  'holidays',
  'honky-tonk',
  'house',
  'idm',
  'indian',
  'indie',
  'indie-pop',
  'industrial',
  'iranian',
  'j-dance',
  'j-idol',
  'j-pop',
  'j-rock',
  'jazz',
  'k-pop',
  'kids',
  'latin',
  'latino',
  'malay',
  'mandopop',
  'metal',
  'metal-misc',
  'metalcore',


In [141]:
artist.to_sql('artist_tag', con=conn, if_exists='replace', index=False)

In [144]:
# READING TABLE via python
pd.read_sql_table('artist_tag', con=conn)

Unnamed: 0,468800,29,2,2011-05-16 14:57:06.530063+00
0,522545,63294,1,2017-05-15 20:35:58.480733+00
1,31390,173,1,2011-05-16 14:57:06.530063+00
2,108404,271,1,2011-05-16 14:57:06.530063+00
3,108404,7,1,2011-05-16 14:57:06.530063+00
4,437100,40881,1,2011-10-30 14:16:25.405823+00
...,...,...,...,...
348894,1885810,369,1,2020-02-04 11:39:45.901086+00
348895,1885810,1992,1,2020-02-04 11:39:46.004888+00
348896,1885810,1405,1,2020-02-04 11:39:46.056913+00
348897,1885810,54816,1,2020-02-04 11:39:46.104693+00


# Algorithm

# Deploy

Examples:
- https://semiconductor.withgoogle.com

typeform.com

## Speech Recognition

In [2]:
import speech_recognition as sr

In [6]:
r = sr.Recognizer()

In [15]:
with sr.Microphone() as source:
    # read the audio data from the default microphone
    print("Recognizing...")
    audio_data = r.record(source, duration=5)
    # convert speech to text
    text = r.recognize_google(audio_data, language="pt-PT")
    print(text)

Recognizing...
alô olá tudo bem como vocês


# Sources and References

## Databases & APIs

> `Spotify`: 
- Official Documentation: https://developer.spotify.com/documentation/web-api/reference/
- spotipy: Python module for Spotify's API (https://pypi.org/project/spotipy/)

> `Billboard`
- billboard.py: Python module for Billboard's API (https://pypi.org/project/billboard.py/) 
- github: (https://github.com/guoguo12/billboard-charts)

## The algorithm

https://musicbrainz.org/doc/MusicBrainz_Database

https://hackernoon.com/spotifys-discover-weekly-how-machine-learning-finds-your-new-music-19a41ab76efe

- Building Music Playlists Recommendation System
https://towardsdatascience.com/building-music-playlists-recommendation-system-564a3e63ef64

https://natural-language-understanding-demo.ng.bluemix.net

## Deploy

How To Convert Speech to Text in Python

https://hackernoon.com/how-to-convert-speech-to-text-in-python-844e3y4l

- Every noise at once: http://everynoise.com/#otherthings


https://blog.chartmetric.com/genre-specific-music-recommendations/

## Requirements

In [None]:
def get_imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            name = val.__name__.split(".")[0]
        elif isinstance(val, type):
            name = val.__module__.split(".")[0]
        poorly_named_packages = {
            "PIL": "Pillow",
            "sklearn": "scikit-learn"
        }
        if name in poorly_named_packages.keys():
            name = poorly_named_packages[name]
        yield name
        
imports = list(set(get_imports()))
requirements = []
for m in pkg_resources.working_set:
    if m.project_name in imports and m.project_name!="pip":
        requirements.append((m.project_name, m.version))
for r in requirements:
    print("{}=={}".format(*r))