# Load Dataset

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("Spotify_Youtube_Sample.csv")
df.head()

Unnamed: 0,Artist,Track,Album,Album_type,Views,Likes,Comments,Licensed,official_video,Stream
0,Gorillaz,Feel Good Inc.,Demon Days,album,693555221.0,6220896.0,169907.0,True,True,1040235000.0
1,Gorillaz,Rhinestone Eyes,Plastic Beach,album,72011645.0,1079128.0,31003.0,True,True,310083700.0
2,Gorillaz,New Gold (feat. Tame Impala and Bootie Brown),New Gold (feat. Tame Impala and Bootie Brown),single,8435055.0,282142.0,7399.0,True,True,63063470.0
3,Gorillaz,On Melancholy Hill,Plastic Beach,album,211754952.0,1788577.0,55229.0,True,True,434663600.0
4,Gorillaz,Clint Eastwood,Gorillaz,album,618480958.0,6197318.0,155930.0,True,True,617259700.0


## Question 1: How many tracks are there in each album type?

In [10]:
df['Album_type'].value_counts()

Album_type
album          14926
single          5004
compilation      788
Name: count, dtype: int64

## Question 2: How many licensed tracks are available for each album type?

In [4]:
pd.crosstab(df['Album_type'], df['Licensed'])

Licensed,False,True
Album_type,Unnamed: 1_level_1,Unnamed: 2_level_1
album,4335,10284
compilation,313,456
single,1460,3400


## Question 3: What is the total number of Views for each Artist? 
## Sort in desending order of views

In [21]:
df.groupby('Artist')['Views'].sum().sort_values(ascending=False)

Artist
Ed Sheeran                1.546021e+10
CoComelon                 1.460167e+10
Katy Perry                1.312063e+10
Charlie Puth              1.216759e+10
Luis Fonsi                1.162811e+10
                              ...     
Jorge & Mateus            0.000000e+00
Yovie & Nuno              0.000000e+00
Angus & Julia Stone       0.000000e+00
KC & The Sunshine Band    0.000000e+00
Matheus & Kauan           0.000000e+00
Name: Views, Length: 2079, dtype: float64

## Question 4: What is the average number of Likes, Comments, and Views for each Album Type?

In [25]:
df.groupby('Album_type').agg({'Likes': ['mean'], 'Comments': 'mean', 'Views': 'mean'}).reset_index()

Unnamed: 0_level_0,Album_type,Likes,Comments,Views
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean
0,album,654610.563981,28195.258255,98427390.0
1,compilation,510876.460836,17006.866841,79618320.0
2,single,713608.001649,27152.191046,82698860.0


 ## Question 5: Which Album Type has the highest average number of Views?

In [31]:
df.groupby('Album_type')['Views'].mean().idxmax()

'album'

## Question 6: Find the artists with the 
## Maximum views
## Maximum likes and
## Maximum comments

In [33]:
max_views_artist = df.loc[df['Views'].idxmax()]['Artist']
max_likes_artist = df.loc[df['Likes'].idxmax()]['Artist']
max_comments_artist = df.loc[df['Comments'].idxmax()]['Artist']

## Question 7: Which track has the most reactions?

In [37]:
df.loc[(df['Likes'] + df['Comments']).idxmax()]['Track']

'Despacito'

## Question 8: Which artist with the highest cumulative stream value across all their tracks?

In [46]:
df.groupby('Artist')['Stream'].sum().idxmax()

'Post Malone'

## Question 9: What is the total number of Likes for each Artist across their official videos?

In [49]:
pd.crosstab(df['Artist'], df['official_video'], values=df['Likes'], aggfunc='sum')


official_video,False,True
Artist,Unnamed: 1_level_1,Unnamed: 2_level_1
$NOT,1393483.0,1353680.0
$uicideboy$,188755.0,3387866.0
(G)I-DLE,250.0,24462713.0
*NSYNC,,4233423.0
070 Shake,,2013005.0
...,...,...
will.i.am,81487.0,13782984.0
Ángela Aguilar,141988.0,10152769.0
Ñejo,30526.0,4318384.0
Ñengo Flow,637053.0,6123287.0


## Question 10: Display correlation for each pair of numerical features

In [36]:
df.select_dtypes(include=['int64','float64']).corr()

Unnamed: 0,Views,Likes,Comments,Stream
Views,1.0,0.891101,0.431185,0.601905
Likes,0.891101,1.0,0.63167,0.654247
Comments,0.431185,0.63167,1.0,0.267737
Stream,0.601905,0.654247,0.267737,1.0
