# General Information

![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/56/Spotify_logo_horizontal_black.jpg/800px-Spotify_logo_horizontal_black.jpg)

From the page of 'Top 50 Spotify Songs - 2019':
The dataset has several variables about the top 50 listened songs on Spotify in 2019.

**Contents of the dataset**
* 50 songs
* 13 variables

**Data Extracted from**
 http://organizeyourmusic.playlistmachinery.com/



In [None]:
import pandas as pd
spotify=pd.read_csv('/kaggle/input/top50spotify2019/top50.csv',encoding='latin_1') #we need the latin encoding for pandas to read it correctly
spotify.head()

In [None]:
#Basic information
spotify.info()

In [None]:
#baisc statistics about the dataset
spotify.describe()

In [None]:
#features of the dataset
spotify.columns

# **Exploratory Data Analysis**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Most Popular Songs

In [None]:
#plotting the most popular songs
plt.figure(figsize=(9,25))
sns.barplot(data=spotify,x='Popularity',y='Track.Name')
plt.show()

**Inference**: Bad Guy took over people's hearts

# Songs by Genre

In [None]:
#plotting the most songs by genre
sns.catplot(y='Genre',kind='count',
           edgecolor="0.5",data=spotify)
plt.show()

**Inference**: Dance Pop was on most people's mind in 2019

# Popularity of Artists

In [None]:
#which artist has the most songs in the Top 50 playlist?
sns.catplot(y='Artist.Name',kind='count',
           edgecolor="0.6",data=spotify)
plt.show()

**Inference**: Ed Sheeran had a great 2019

# Optimal BPM

In [None]:
#what is the optimal bpm for a song?
plt.figure(figsize=(25,9))
sns.countplot(data=spotify,x='Beats.Per.Minute')
plt.title('Beats/min')
plt.show()

**Inference**: 176 bpm seems to be the most preffered

# 2D and 3D Plots for Energy,Danceability and Loudness

In [None]:
#Energy, Danceability and Loudness
#2D Plotting

#Energy
plt.figure(figsize=(8,4))
sns.distplot(spotify['Energy'],kde=False,bins=15,color='red')
plt.title('Energy')
plt.show()

#Danceability
plt.figure(figsize=(8,4))
sns.distplot(spotify['Danceability'],kde=False,bins=15,color='blue')
plt.title('Danceability')
plt.show()

#Loudness
plt.figure(figsize=(8,4))
sns.distplot(spotify['Loudness..dB..'],kde=False,bins=15,color='green')
plt.title('Loudness')
plt.xlabel('Loudness(dB)')
plt.show()


In [None]:
#3D Plotting of Energy, Danceability and Loudness (with Plotly)
import plotly.graph_objects as go

#generate charts
fig = go.Figure(data = [go.Scatter3d(
    x = spotify['Energy'],
    y = spotify['Danceability'],
    z = spotify['Loudness..dB..'],
    text = spotify['Track.Name'],  ## Additional texts which will be shown
    mode = 'markers',
    marker = dict(
    color = spotify['Popularity'],
    colorbar_title = 'Popularity',
    colorscale = 'blues'
    )
)])

#set variables and size
fig.update_layout(width=800, height=800, title = 'Energy, Danceability & Loudness of Songs',
                  scene = dict(xaxis=dict(title='Energy'),
                               yaxis=dict(title='Danceability'),
                               zaxis=dict(title='Loudness')
                               )
                 )

fig.show()


# Speechiness and Acousticness

In [None]:
#relating speechiness and acousticness with popularity
plt.figure(figsize=(12,6))
sns.violinplot(x='Speechiness.',y='Popularity',data=spotify)
plt.xlabel('Speechiness')
plt.ylabel('Popularity')
plt.title('Speechiness vs Popularity')
plt.show()

plt.figure(figsize=(10,10))
sns.despine(offset=10,left=True)
sns.jointplot(data=spotify,
             x='Acousticness..',
             y='Popularity',
             kind='kde',
             space=1)

plt.title('Acousticness vs Popularity')
plt.show()

# Length of the songs

In [None]:
#looking at the length of songs
import plotly.express as px

fig=px.line(spotify,y='Length.',x='Track.Name',title='Length distribution of songs')
fig.show()

# Correlation between all variables

In [None]:
#plotting correlation between all variables
plt.figure(figsize=(15,15))
plt.title('Correlation between all variables')
sns.heatmap(data=spotify.corr(),
           annot=True,
           square=True,
           linewidths=1)
plt.show()

In [None]:
#all histograms with data columns
sns.pairplot(spotify)
plt.plot()
plt.show()