# Spotify Top 50 Analysis

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style('whitegrid')

In [None]:
df = pd.read_csv('/kaggle/input/top50spotify2019/top50.csv',encoding='ISO-8859-1')

## 1. About

### 1.1 Data

The top 50 most listened songs in the world by spotify. This dataset has several variables about the songs.

### 1.2 Column Description

**Beats Per Minute (BPM)** - The tempo of the song.<br>
**Energy** - The energy of a song - the higher the value, the more energtic. song<br>
**Danceability** - The higher the value, the easier it is to dance to this song.<br>
**Loudness (dB)** - The higher the value, the louder the song.<br>
**Liveness** - The higher the value, the more likely the song is a live recording.<br>
**Valence** - The higher the value, the more positive mood for the song.<br>
**Length** - The duration of the song.<br>
**Acousticness** - The higher the value the more acoustic the song is.<br>
**Speechiness** - The higher the value the more spoken word the song contains.<br>
**Popularity** - The higher the value the more popular the song is.<br>

## 2. Preproccess the Data

### 2.1 Explore Top 5 Data

In [None]:
df.head()

### 2.2 Renaming The Columns

In [None]:
df.rename({
    'Unnamed: 0' : 'rank',
    'Track.Name' : 'track_name',
    'Artist.Name' : 'artist_name',
    'Genre' : 'genre',
    'Beats.Per.Minute' : 'beats_per_minute',
    'Energy' : 'energy',
    'Danceability' : 'danceability',
    'Loudness..dB..' : 'loudness_db',
    'Liveness' : 'liveness',
    'Valence.' : 'valence',
    'Length.' : 'length',
    'Acousticness..' : 'acousticness',
    'Speechiness.' : 'speechiness',
    'Popularity' : 'popularity'
},axis=1,inplace=True)

## 3. Analysis

### 3.1 Top Five Genre

In [None]:
df['genre'].value_counts().head()

### 3.2 Top Ten Artist

In [None]:
df['artist_name'].value_counts().head(10)

### 3.3 Genre Analysis

In [None]:
df.genre.value_counts()

In [None]:
df_group_by_genre = df.groupby('genre').mean()

In [None]:
def plot_genre_analysis(feat):    
    plt.figure(figsize=(8,6))
    sns.barplot(data=df,y=df_group_by_genre.sort_values(by=feat,ascending=False).index,x=df_group_by_genre.sort_values(by=feat,ascending=False)[feat])

In [None]:
for col in df.select_dtypes(exclude='O').columns:
    plot_genre_analysis(col)

### 3.4 Variable Analysis

In [None]:
for item in df.select_dtypes(exclude='O').drop('rank',axis=1):
    fig,ax = plt.subplots(nrows=1,ncols=1)
    sns.distplot(df[item],ax=ax)


### 3.5 Song Of The Most Variable

In [None]:
for item in df.select_dtypes(exclude='O').drop('rank',axis=1):
    the_most = df[df[item] == df[item].max()][['rank','track_name','artist_name','genre',item]]
    print('Song with the highest {} = "{}" rank {} with {} = {}'.format(item,the_most['track_name'].values[0],the_most['rank'].values[0],item,the_most[item].values[0]) )
    print('----')

### 3.6 Relationship Between Variable Vs Popularity

In [None]:
column_to_pair = df.select_dtypes(exclude='O').drop('popularity',axis=1).columns
for item in column_to_pair:
    fig,ax = plt.subplots(nrows=1,ncols=1)
    sns.scatterplot(df['popularity'],df[item],ax=ax)

### 3.7 Relationship Between Variable Vs Rank

In [None]:
column_to_pair = df.select_dtypes(exclude='O').drop('rank',axis=1).columns
for item in column_to_pair:
    fig,ax = plt.subplots(nrows=1,ncols=1)
    sns.scatterplot(df['rank'],df[item],ax=ax)