# Imports libraries

In [1]:
import pandas as pd
import numpy as np
import swifter
from pathlib import Path
import os
import base64
from requests import post, get
import json 

# Dummy 1

## Data loading and cleaning

In [8]:
def get_absolute_path(relative_path):
    '''
    Creates an absolute path based on a relative path provided
    '''
    try:
        path = str(os.getcwd()).split("/")
        path = "/".join([i for i in path[:-1]])
        absolute_path = path + "/" + relative_path
        return absolute_path
    except Exception as error:
        print(error)
        raise

try:
    ## Loads the dataset
    path = get_absolute_path(r"Data/charts.csv")
    df = pd.read_csv(path)

    ## Drops duplicates
    df.sort_values(by= ["date", "position"], inplace= True)
    df.drop_duplicates(subset= ["name", "position", "date"], keep= "first", inplace= True)

    ## Transform dates into datetime objects
    df.date = pd.to_datetime(df.date)
    df = df.loc[(df.date >= pd.to_datetime("2020-11-12"))]
except Exception as error:
    print(error)
    raise

In [10]:
try:
    artists = []
    for (index, row) in df.iterrows():
        artist = row.artists.replace("[", "").replace("'", "").replace("]", "")
        artist = artist.split(",")
        artist = artist[0].strip()
        artists.append(artist)

    df.drop(columns= ["artists"], inplace= True)
    df["artists"] = artists
except Exception as error:
    print(error)
    raise

## Dummy

In [11]:
try:
    variable = []
    songs = {}
    previous_date = {}

    for date in df.date.unique():
        temp_df = df.loc[(df.date == date)]
        for (index, row) in temp_df.iterrows():
            if row.name not in songs:
                variable.append(0)
                songs[row.name] = date
            else:
                if previous_date == songs.get(row.name):
                    variable.append(1)
                    songs[row.name] = date
                elif previous_date != songs.get(row.name):
                    variable.append(0)
                    songs[row.name] = date
        
        previous_date = date

    df["dummy_1"] = variable
except Exception as error:
    print(error)
    raise

# Release Date

In [None]:
## Defines client credentials for the API and some functions
## used to make the request
client_id = '1435ba72a607475eab2e2184cabc2777'
client_secret = '61951b1287e94f91bb4a57c664606e59'

def get_token():
    '''
    Generates a token to make an API request
    '''
    auth_string= client_id + ':' + client_secret
    auth_bytes = auth_string.encode('utf-8')
    auth_base64 = str(base64.b64encode(auth_bytes), 'utf-8')

    url = 'https://accounts.spotify.com/api/token'
    headers = {"Authorization" : 'Basic '+ auth_base64,
               'Content_type': 'application/x-www-form-urlencoded'
               }
    data = {'grant_type': 'client_credentials'}
    result = post(url, headers = headers, data = data)
    json_result = json.loads(result.content)
    token = json_result['access_token']
    return token

def get_auth_header(token):
    return {'Authorization': 'Bearer ' + token }

def get_features(token, song_id):
    '''
    Retreives all the information based on the track ID passed
    '''
    url = f'https://api.spotify.com/v1/tracks/{song_id}?market=US'
    headers = get_auth_header(token)
    result = get(url , headers = headers)
    json_result = json.loads(result.content)

    return json_result

token = get_token()

In [None]:
data = {"track_id": [], "release_date": []}
for id in df.track_id.unique():
    track = get_features(token, id)
    data["track_id"].append(id)
    data["release_date"].append(track["album"]["release_date"])

dates = pd.DataFrame(data= data)

# Dummy 2

In [13]:
## Loads the raw dataset again
df2 = pd.read_csv(path)
df2.date = pd.to_datetime(df2.date)

df2.sort_values(by= ["date", "position"], inplace= True)

Unnamed: 0,track_id,name,country,date,position,streams,artists,artist_genres,duration,explicit
8922,35mvY5S1H3J2QZyna3TFe0,positions,us,2020-11-05,1,11638281,['Ariana Grande'],"['dance pop', 'pop']",172324,True
7048,6Im9k8u9iIzKMrmV7BWtlF,34+35,us,2020-11-05,2,10380924,['Ariana Grande'],"['dance pop', 'pop']",173710,True
14670,7hxHWCCAIIxFLCzvDgnQHX,"Lemonade (feat. Gunna, Don Toliver & NAV)",us,2020-11-05,3,7796741,"['Internet Money', 'Gunna', 'Don Toliver', 'NAV']","['pop rap', 'melodic rap', 'pop', 'canadian tr...",195428,True
14625,5GkQIP5mWPi4KZLLXeuFTT,motive (with Doja Cat),us,2020-11-05,4,7586598,"['Ariana Grande', 'Doja Cat']","['dance pop', 'pop']",167989,True
19614,3tjFYV6RSFtuktYl3ZtYcq,Mood (feat. iann dior),us,2020-11-05,5,7490019,"['24kGoldn', 'iann dior']","['pop rap', 'pop', 'melodic rap', 'cali rap', ...",140525,True
...,...,...,...,...,...,...,...,...,...,...
13018,0zCgWGmDF0aih5qexATyBn,After Dark,us,2022-11-10,196,1748379,['Mr.Kitty'],['weirdcore'],257147,False
14737,6PaWZ0PIMxx15YBgCohvXY,Gilded Lily,us,2022-11-10,197,1745294,['Cults'],"['indie pop', 'indie surf', 'experimental pop']",212736,False
16723,7sO5G9EABYOXQKNPNiE9NR,Ric Flair Drip (with Metro Boomin),us,2022-11-10,198,1744847,"['Offset', 'Metro Boomin']","['pop rap', 'southern hip hop', 'atl hip hop',...",172800,True
7249,0HYAsQwJIO6FLqpyTeD3l6,死ぬのがいいわ,us,2022-11-10,199,1743967,['Fujii Kaze'],['japanese teen pop'],185573,False


In [None]:
chart_dates = df2.date.unique()
previous_date = chart_dates[0]
variable = []
for date in chart_dates:
    temp_df = df2.loc[(df2.date == date)]
    for (index, row) in temp_df.iterrows():
        if previous_date <= row.release_date <= date:
            variable.append(1)
        else:
            variable.append(0)
    
    previous_date = date

df["dummy_2"] = variable