# Tratamento de Dados
## Dados Pessoais de Uso do Spotify
### Dashboard Analítico

In [22]:
# Imports

import datetime
import json
import numpy as np
import pandas as pd
from GenreScrapy import genreScrapy


In [2]:
# Reading Data

df1 = pd.read_json('StreamingHistory0.json')
df2 = pd.read_json('StreamingHistory1.json')


In [3]:
# Functions

def to_turns(value):
    if (value < datetime.time(6,0)):
        return 'Madrugada'
    elif (value < datetime.time(12,0)):
        return 'Manhã'
    elif (value < datetime.time(18,0)):
        return 'Tarde'
    else:
        return 'Noite'

In [4]:
# Manipulation Data

# Concatenate Dataframes
df = pd.concat([df1, df2])

# Convert Data Type
df = df.astype({'endTime': 'datetime64', 'artistName': 'string', 'trackName': 'string'})

# Rename Column
df = df.rename(columns={'endTime': 'endDatetime'})

# Create Columns
df['minutesPlayed'] = round(df['msPlayed']/60000, 2)
df['endDate'] = df['endDatetime'].dt.date
df['endTime'] = df['endDatetime'].dt.time
df['turns'] = df['endTime'].map(to_turns)

# Convert Data Type
df = df.astype({'turns': 'string'})

# Order Columns
df = df.loc[:, ['endDatetime', 'endDate', 'endTime', 'msPlayed', 'minutesPlayed', 'turns', 'trackName', 'artistName']]


In [5]:
# Exploratory Analysis

display(df)
display(df.info())
display(df.describe())


Unnamed: 0,endDatetime,endDate,endTime,msPlayed,minutesPlayed,turns,trackName,artistName
0,2021-01-18 20:41:00,2021-01-18,20:41:00,18610,0.31,Noite,Come A Little Bit Closer,Jay & The Americans
1,2021-01-21 01:58:00,2021-01-21,01:58:00,292853,4.88,Madrugada,Touch,Cigarettes After Sex
2,2021-01-21 02:03:00,2021-01-21,02:03:00,285147,4.75,Madrugada,Hentai,Cigarettes After Sex
3,2021-01-21 02:07:00,2021-01-21,02:07:00,256800,4.28,Madrugada,Cry,Cigarettes After Sex
4,2021-01-21 02:12:00,2021-01-21,02:12:00,245840,4.10,Madrugada,Falling In Love,Cigarettes After Sex
...,...,...,...,...,...,...,...,...
6197,2022-01-21 18:50:00,2022-01-21,18:50:00,166840,2.78,Noite,Oba Lá Vem Ela / Incluindo: Musa Da Ilha Grand...,Charlie Brown Jr.
6198,2022-01-21 18:53:00,2022-01-21,18:53:00,140200,2.34,Noite,Tudo Mudar - Ao Vivo,Charlie Brown Jr.
6199,2022-01-21 18:55:00,2022-01-21,18:55:00,151667,2.53,Noite,Proibida Pra Mim (Grazon) - Ao Vivo,Charlie Brown Jr.
6200,2022-01-21 18:59:00,2022-01-21,18:59:00,229827,3.83,Noite,Charlie Brown Jr - Ao Vivo,Charlie Brown Jr.


<class 'pandas.core.frame.DataFrame'>
Int64Index: 16202 entries, 0 to 6201
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   endDatetime    16202 non-null  datetime64[ns]
 1   endDate        16202 non-null  object        
 2   endTime        16202 non-null  object        
 3   msPlayed       16202 non-null  int64         
 4   minutesPlayed  16202 non-null  float64       
 5   turns          16202 non-null  string        
 6   trackName      16202 non-null  string        
 7   artistName     16202 non-null  string        
dtypes: datetime64[ns](1), float64(1), int64(1), object(2), string(3)
memory usage: 1.1+ MB


None

Unnamed: 0,msPlayed,minutesPlayed
count,16202.0,16202.0
mean,222445.3,3.707403
std,127133.0,2.119005
min,0.0,0.0
25%,175907.0,2.93
50%,213375.5,3.56
75%,257560.0,4.29
max,4390540.0,73.18


In [7]:
"""
# Artist Names (Uniques)

artistName = list(set(df.artistName))
artistName.sort()

# Get Genres

genreList = list()

for name in artistName:
    try:
        language = 'portuguese'
        genre = genreScrapy(name, language)
        if genre == 'Not found':
            try:
                language = 'english'
                genre = genreScrapy(name, language)
            except:
                genre = 'Not found'
    except:
        try:
            language = 'english'
            genre = genreScrapy(name, language)
        except:
            genre = 'Not found'
            
    genreList.append(genre)

# Data Manipulation

genreList = [i.title() for i in genreList]
dict_genres = dict(zip(artistName, genreList))

# Export JSON

# Create json object from dictionary
json = json.dumps(dict_genres)

# Open file for writing, "w" 
f = open("dict_genres.json","w")

# Write json object to file
f.write(json)

# Close file
f.close()
"""

In [41]:
# Genres Analysis

# New Dataframe
df_genres = df.copy()

# Import JSON File
f = open('dict_genres.json') 
artist_genres = json.load(f)

# Modify Dataframe

df_genres['genres'] = df_genres['artistName'].map(artist_genres)
display(df_genres)

# Export Dataframe

df.to_csv('data.csv')


Unnamed: 0,endDatetime,endDate,endTime,msPlayed,minutesPlayed,turns,trackName,artistName,genres
0,2021-01-18 20:41:00,2021-01-18,20:41:00,18610,0.31,Noite,Come A Little Bit Closer,Jay & The Americans,Pop Rock
1,2021-01-21 01:58:00,2021-01-21,01:58:00,292853,4.88,Madrugada,Touch,Cigarettes After Sex,Dream Pop
2,2021-01-21 02:03:00,2021-01-21,02:03:00,285147,4.75,Madrugada,Hentai,Cigarettes After Sex,Dream Pop
3,2021-01-21 02:07:00,2021-01-21,02:07:00,256800,4.28,Madrugada,Cry,Cigarettes After Sex,Dream Pop
4,2021-01-21 02:12:00,2021-01-21,02:12:00,245840,4.10,Madrugada,Falling In Love,Cigarettes After Sex,Dream Pop
...,...,...,...,...,...,...,...,...,...
6197,2022-01-21 18:50:00,2022-01-21,18:50:00,166840,2.78,Noite,Oba Lá Vem Ela / Incluindo: Musa Da Ilha Grand...,Charlie Brown Jr.,Rock Alternativo
6198,2022-01-21 18:53:00,2022-01-21,18:53:00,140200,2.34,Noite,Tudo Mudar - Ao Vivo,Charlie Brown Jr.,Rock Alternativo
6199,2022-01-21 18:55:00,2022-01-21,18:55:00,151667,2.53,Noite,Proibida Pra Mim (Grazon) - Ao Vivo,Charlie Brown Jr.,Rock Alternativo
6200,2022-01-21 18:59:00,2022-01-21,18:59:00,229827,3.83,Noite,Charlie Brown Jr - Ao Vivo,Charlie Brown Jr.,Rock Alternativo
