In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing the data

In [None]:
pd.get_option("display.max_columns")
df = pd.read_csv("/kaggle/input/spotify-top-100/top2018.csv")
df.head()

## Checking the Datatypes

In [None]:
df.dtypes

In [None]:
df.describe(include="all")

In [None]:
df.columns

## Finding which Artist had most TOP 100 songs

In [None]:
art = df.groupby("artists")["artists"].count().sort_values(ascending=False)
art.head()

These are the TOP 5 artist who had most TOP 100 songs. Post Malone and XXXTENTACTION has 6 songs.

In [None]:
## Converting it to a dataframe
art = art.to_frame()

In [None]:
art.head()

In [None]:
artist=art.rename(columns={"artists":"artists","artists":"count"})
artist.head(10) # Viewing the top 10 artists

#### Reseting the index

In [None]:
artist = artist.reset_index()

In [None]:
top10artist = artist.head(10)
top10artist

In [None]:
# importing matplotlib for plotting
import matplotlib.pyplot as plt

In [None]:
fig = plt.figure(figsize=(15,8))
plt.bar(top10artist["artists"],top10artist["count"],color="purple",width=0.3)
plt.xlabel("Artists")
plt.ylabel("Number of Songs")
plt.title("Artist vs songs")
plt.show()

Exporting top10artist dataframe to CSV format 

In [None]:
top10artist.to_csv("Top 10 artist.csv",index=None)

## Checking the Correlation


In [None]:
import seaborn as sns
corr = df.corr()
fig, ax = plt.subplots(figsize=(10,10)) 
sns.heatmap(corr,xticklabels=corr.columns.values,yticklabels=corr.columns.values,linewidths=.5, ax=ax)

In [None]:
# viewing the columns of the dataframe
df.columns

## Checking the artist with their max danceability, energy, key, loudness

### Danceability

In [None]:
dan=df.groupby(["artists"])["danceability"].max().sort_values(ascending=False)
dan.head()

In [None]:
##converting dan to a dataframe
dan=dan.to_frame()
# Resetting the index
dan = dan.reset_index()
dan

In [None]:
top10dan=dan.head(10)
top10dan

In [None]:
figure = plt.figure(figsize=(15,8))
plots=sns.barplot(x="artists",y="danceability",data=top10dan)
for bar in plots.patches:
    plots.annotate(format(bar.get_height(), '.2f'), 
                   (bar.get_x() + bar.get_width() / 2, 
                    bar.get_height()), ha='center', va='center',
                   size=15, xytext=(0, 8),
                   textcoords='offset points')
plt.xlabel("Artists",size=15)
plt.ylabel("Danceability",size=15)
plt.title("Danceability",size=20)
plt.show()

In [None]:
# Exporting it to CSV file
top10dan.to_csv("Top 10 danceability.csv",index=False)

### Energy

In [None]:
ene = df.groupby(["artists"])["energy"].max().sort_values(ascending=False)
ene

In [None]:
#converting it to a dataframe

ene = ene.to_frame()

#reseting the index

ene = ene.reset_index()

ene

In [None]:
# Viewing the top 10 artists
top10ene = ene.head(10)
top10ene

In [None]:
figure = plt.figure(figsize=(17,5))
ax = figure.add_subplot(111)
plt.plot(top10ene["artists"],top10ene["energy"],color="red")
for i,j in top10ene.energy.items():
    ax.annotate(str(j), xy=(i, j))
plt.xlabel("Artist")
plt.ylabel("Energy level")
plt.title("Energy level for Artist",size=20)
plt.show()

In [None]:
#Exporting it to a csv file
top10ene.to_csv("Top 10 energy.csv",index=False)

In [None]:
df.columns

### Key, speechiness and acousticness

In [None]:
kla=df.groupby(["artists"])["key","speechiness","acousticness"].max().sort_values(by=["key","speechiness"],ascending=False)
top10kla = kla.head(10)

In [None]:
top10kla=top10kla.reset_index()

In [None]:
top10kla

In [None]:
figure = plt.figure(figsize=(15,8))
plots=sns.barplot(x="artists",y="speechiness",data=top10kla)
for bar in plots.patches:
    plots.annotate(format(bar.get_height(), '.2f'), 
                   (bar.get_x() + bar.get_width() / 2, 
                    bar.get_height()), ha='center', va='center',
                   size=15, xytext=(0, 8),
                   textcoords='offset points')
plt.xlabel("Artists",size=15)
plt.ylabel("speechiness",size=15)
plt.title("speechiness for the artists with key value \'11\'",size=20)
plt.show()

print("\n\n")

figure = plt.figure(figsize=(15,8))
plots=sns.barplot(x="artists",y="acousticness",data=top10kla)
for bar in plots.patches:
    plots.annotate(format(bar.get_height(), '.2f'), 
                   (bar.get_x() + bar.get_width() / 2, 
                    bar.get_height()), ha='center', va='center',
                   size=15, xytext=(0, 8),
                   textcoords='offset points')
plt.xlabel("Artists",size=15)
plt.ylabel("acousticness",size=15)
plt.title("acousticness for the artists with key value \'11\'",size=20)
plt.show()

## Mode

In [None]:
mode = df['mode'].value_counts()
mode

In [None]:
mode=mode.to_frame()
mode=mode.reset_index()
mode=mode.rename(columns={"index":"mode","mode":"count"})

In [None]:
figure = plt.figure(figsize=(10,10))
plots=sns.barplot(x="mode",y="count",data=mode)
for bar in plots.patches:
    plots.annotate(format(bar.get_height(), '.2f'), 
                   (bar.get_x() + bar.get_width() / 2, 
                    bar.get_height()), ha='center', va='center',
                   size=15, xytext=(0, 8),
                   textcoords='offset points')
plt.xlabel("Mode",size=15)
plt.ylabel("Count",size=15)
plt.title("Count of mode",size=20)
plt.show()

In [None]:
# Exporting mode to csv
mode.to_csv("Count of mode.csv",index=False)

In [None]:
df.columns

## Liveness

In [None]:
live=df.groupby(["artists"])["liveness"].mean().sort_values(ascending=False)
top10live = live.head(10)

In [None]:
# Converting it to dataframe
top10live = top10live.to_frame()

#reseting the dataframe's index
top10live = top10live.reset_index()

In [None]:
# Converting the values to percentage
for i in range (0,len(top10live)):
    top10live["liveness"][i] = top10live["liveness"][i]*100
    

In [None]:
figure = plt.figure(figsize=(16,8))
plots=sns.barplot(x="artists",y="liveness",data=top10live)
for bar in plots.patches:
    plots.annotate(format(bar.get_height(), '.2f'), 
                   (bar.get_x() + bar.get_width() / 2, 
                    bar.get_height()), ha='center', va='center',
                   size=15, xytext=(0, 8),
                   textcoords='offset points')
plt.xlabel("artists",size=15)
plt.ylabel("liveness",size=15)
plt.title("Artists with their respective live percentage",size=20)
plt.show()

In [None]:
# Exporting to csv format
top10live.to_csv("Top 10 live.csv",index=False)

## Valence

In [None]:
val=df.groupby(["artists"])["valence"].mean().sort_values(ascending=False)
top10val = val.head(10)

In [None]:
# changing it to dataframe
top10val=top10val.to_frame()

# resetting their indexes

top10val = top10val.reset_index()

In [None]:
top10val

In [None]:
for i in range (0,len(top10val)):
    top10val["valence"][i] = top10val["valence"][i]*100
    

In [None]:
figure = plt.figure(figsize=(15,8))
plots=sns.barplot(x="artists",y="valence",data=top10val)
for bar in plots.patches:
    plots.annotate(format(bar.get_height(), '.2f'), 
                   (bar.get_x() + bar.get_width() / 2, 
                    bar.get_height()), ha='center', va='center',
                   size=15, xytext=(0, 8),
                   textcoords='offset points')
plt.xlabel("Artists",size=15)
plt.ylabel("valence",size=15)
plt.title("Artist with high valence(Happy songs)%",size=20)
plt.show()

In [None]:
# Exporting to csv format

top10val.to_csv("Top 10 Valence.csv",index=False)

In [None]:
df.head()