<a href="https://colab.research.google.com/github/sanchitgulati/spotipy_journey_of_the_music/blob/main/spotipy_journey_of_the_music.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installation

In [1]:
import pip

def install(package):
    if hasattr(pip, 'main'):
        pip.main(['install', package])
    else:
        pip._internal.main(['install', package])



In [2]:
# %%capture captures the cell output, so need less things don't populate the cell 
%%capture
install('spotipy')

In [3]:
%%capture
install('panda')

In [4]:
%%capture
install('sklearn')

In [5]:
%%capture
install('matplotlib')

In [6]:
%%capture
install('seaborn')

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Imports

In [8]:
import pandas as pd
import datetime
import time

# Secret Key

In [9]:
secrets = {'CLIENT_ID': "<CLIENT-ID-HERE>",'CLIENT_SECRET':"<CLIENT-SECRET-HERE>"}

In [10]:
# Used once to store your APP secrets

# import pickle
# with open('secrets.dictionary', 'wb') as secret_dictionary_file:
#   pickle.dump(secrets, secret_dictionary_file)
# !cp 'secrets.dictionary' "drive/My Drive/spotipy/secrets.dictionary" 

In [11]:
import pickle
!cp "drive/My Drive/spotipy/secrets.dictionary" 'secrets.dictionary' 
with open('secrets.dictionary', 'rb') as secret_dictionary_file:
    secrets = pickle.load(secret_dictionary_file)

# Imports

In [12]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
client_credentials_manager = SpotifyClientCredentials(client_id=secrets['CLIENT_ID'], client_secret=secrets['CLIENT_SECRET'])
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Functions

In [13]:
def GetNewResults(market,popularity = 70):
  data = []
  # Fetches only the top? 50 albums, as limited by the API for single call
  album_results = sp.search(q='tag:new', market=market,type='album', limit=50)
  for i, a_overview in enumerate(album_results['albums']['items']):
    a = GetAlbumResults(a_overview['id'])
    if a[-1] > popularity: # only extremely popular albums, defaults to 70
      data.append(a)
  return data      

In [14]:
def GetAlbumResults(album_id):
  a = sp.album(album_id)
  date_object = datetime.datetime.strptime(a['release_date'], '%Y-%m-%d').date()
  td = datetime.date.today() - date_object
  return [a['id'],a['name'],a['images'][1]['url'],a['release_date'],td.days,a['popularity']] 

# Main

In [15]:
# all_market_codes = sp.available_markets()
all_market_codes = {"markets":['IN','US','PK','BR','MX','JP','NL','FR','DE','FR']}
# all_market_codes = {"markets":['US']}

In [16]:
!cp "drive/My Drive/spotipy/data.csv" data.csv
df = pd.read_csv('data.csv')

In [17]:
df.head()

Unnamed: 0.1,Unnamed: 0,id,name,image,release_date,days_since_release,popularity
0,0,2ZiBfBw770CJC0f4SI1Tji,Feed Tha Streets III,https://i.scdn.co/image/ab67616d00001e02d513f3...,2022-11-18,24,72
1,1,3lsdB3dY4odywNI42KV6D9,SEN2 KBRN VOL. 2,https://i.scdn.co/image/ab67616d00001e02dce7a3...,2022-11-18,24,80
2,2,3SxiMoJROq0kvfbQtd75xp,Down In Atlanta,https://i.scdn.co/image/ab67616d00001e029c3bd1...,2022-11-18,24,72
3,3,6J0q4a7WkkI6OvlZC4rLpi,CONTINGENTE,https://i.scdn.co/image/ab67616d00001e02b9ae53...,2022-11-23,19,76
4,4,0gr5OmB74UhoANEXwYT3gE,Dreamers [Music from the FIFA World Cup Qatar ...,https://i.scdn.co/image/ab67616d00001e02ef5718...,2022-11-20,22,81
5,5,0YlJpX1XiE8ghnRJCU0Dwv,Sonder,https://i.scdn.co/image/ab67616d00001e02cc887b...,2022-11-18,24,74
6,6,1QUr29IDxeCaCpZOqwyvHW,"Duki: Bzrp Music Sessions, Vol. 50",https://i.scdn.co/image/ab67616d00001e0293c4ee...,2022-11-16,26,77
7,7,0kfy23QZO9KebgwxJvXzJI,IO NON HO PAURA,https://i.scdn.co/image/ab67616d00001e02cc05a9...,2022-11-18,24,75
8,8,202T8cUT1c3oQarCOJBy6m,Milano Demons,https://i.scdn.co/image/ab67616d00001e02c7e356...,2022-11-25,17,80
9,9,2SWwDDBZG7UfECbPWQ2t4h,Back To The Game,https://i.scdn.co/image/ab67616d00001e027d43a8...,2022-11-23,19,75


In [19]:
data = []

unique_albums = df.id.unique()
for album_id in unique_albums:
  data.append(GetAlbumResults(album_id))

for market in all_market_codes['markets']:
  try:
    d = GetNewResults(market)
    data.extend(d)
  except:
    print("No New Results from the {}".format(market))


df = pd.DataFrame(data,columns=['id','name','image','release_date','days_since_release','popularity'])
df = df.drop_duplicates(subset=['id','days_since_release'])

In [20]:
df.to_csv('data.csv')
!cp data.csv "drive/My Drive/spotipy/"

In [26]:
# 1. Authorizing google colab
from google.colab import auth
auth.authenticate_user()

# 2. credentials for google sheets
import gspread
from google.auth import default
creds, _ = default()

# 3. authotizing the connection
gc = gspread.authorize(creds)

In [None]:
sh = gc.open('spotipy')
today = str(datetime.date.today())
worksheet = sh.add_worksheet(title=today,rows=100,cols=10)
# worksheet.title = today
worksheet.update([df.columns.values.tolist()] + df.values.tolist())

# Future
## Analysis of collected data, need atleast 3 months of data
### Will CRON job the code above privately

In [21]:
## Retreive File(s) from Google drive

In [22]:
# !cp "drive/My Drive/spotipy/<file-name>.csv" data.csv 

In [23]:
# df = pd.read_csv('<file-name>.csv')

# ScratchPad

In [24]:
GetNewResults('IN')

[['07w0rG5TETcyihsEIZR3qG',
  'SOS',
  'https://i.scdn.co/image/ab67616d00001e0270dbc9f47669d120ad874ec1',
  '2022-12-09',
  14,
  97]]

In [25]:
GetAlbumResults('0gr5OmB74UhoANEXwYT3gE')

['0gr5OmB74UhoANEXwYT3gE',
 'Dreamers [Music from the FIFA World Cup Qatar 2022 Official Soundtrack]',
 'https://i.scdn.co/image/ab67616d00001e02ef57183066d6cac0cabb85c6',
 '2022-11-20',
 33,
 81]