# Spotify Music Skips Action Prediction
Spotify has over 190 million active users interacting with over 40 million tracks. The goal of the challenge is to predict the likelihood of a user skipping any given song during a listening session.

**Data**:

Orginally data is provided in competition hosted on [blue_text](crowdai.org) i.e. later moved to [blue_text](aicrowd.com) It contain data description file for you to work with problem on your own.

**Methodology**:
* Loading Data
* Processing Data
* EDA
* Feature Engineering
* Model Selection
* Choosing Model
* Deployment

NOTE: There are certain Terms and Condition for which I can't share you original link for data, I have moved my data to my google drive. But you could register youself into the competition on this link.

In [1]:
# Imporing required libraries.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore') # ignore warnings

# Preparing Setup
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# setting randomness in notebook
np.random.seed(0)

## Loading Data

In [2]:
session_data = pd.read_csv('technocolabs_mini_training_30MB set.xls')

In [3]:
session_data.rename(columns = {'track_id_clean':'track_id'}, inplace = True) 
session_data.dropna(axis=0, inplace=True)
session_data.head()

Unnamed: 0,session_id,session_position,session_length,track_id,skip_1,skip_2,skip_3,not_skipped,context_switch,no_pause_before_play,short_pause_before_play,long_pause_before_play,hist_user_behavior_n_seekfwd,hist_user_behavior_n_seekback,hist_user_behavior_is_shuffle,hour_of_day,date,premium,context_type,hist_user_behavior_reason_start,hist_user_behavior_reason_end
0,0_00006f66-33e5-4de7-a324-2d18e439fc1e,1.0,20.0,t_0479f24c-27d2-46d6-a00c-7ec928f2b539,False,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone
1,0_00006f66-33e5-4de7-a324-2d18e439fc1e,2.0,20.0,t_9099cd7b-c238-47b7-9381-f23f2c1d1043,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone
2,0_00006f66-33e5-4de7-a324-2d18e439fc1e,3.0,20.0,t_fc5df5ba-5396-49a7-8b29-35d0d28249e0,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone
3,0_00006f66-33e5-4de7-a324-2d18e439fc1e,4.0,20.0,t_23cff8d6-d874-4b20-83dc-94e450e8aa20,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone
4,0_00006f66-33e5-4de7-a324-2d18e439fc1e,5.0,20.0,t_64f3743c-f624-46bb-a579-0f3f9a07a123,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone


In [4]:
tf0 = pd.read_csv('tf_00_1GB.csv')
tf1 = pd.read_csv('tf_01_1GB.csv')

track_data = tf0.append(tf1, ignore_index = True) 

In [5]:
track_data.head()

Unnamed: 0,track_id,duration,release_year,us_popularity_estimate,acousticness,beat_strength,bounciness,danceability,dyn_range_mean,energy,flatness,instrumentalness,key,liveness,loudness,mechanism,mode,organism,speechiness,tempo,time_signature,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7
0,t_2e8f4b71-8a0b-4b9c-b7d8-fb5208e87f9f,326.013336,1971,99.582885,0.716209,0.366495,0.332605,0.439835,5.805774,0.238847,1.0107,0.6533861,0,0.769258,-17.094,0.19917,major,0.759699,0.03394,100.370003,4,0.223395,0.146012,-0.706908,0.259496,0.481157,0.238427,-0.098389,-0.25496,-0.227383
1,t_dae2ec0e-ec7b-4b3e-b60c-4a884d0eccb0,147.813324,1963,97.272035,0.83946,0.362212,0.389829,0.50758,6.845427,0.420476,1.000398,3.94155e-09,0,0.085844,-11.295,0.357639,major,0.747436,0.049856,141.334,4,0.484702,0.039554,-0.539554,0.105141,0.692589,0.226047,-0.468162,0.164389,-0.769024
2,t_cf0164dd-1531-4399-bfa6-dec19cd1fedc,110.400002,1974,99.620384,0.054673,0.495002,0.589378,0.552311,9.361949,0.842938,0.957766,0.1041595,0,0.407325,-9.31,0.304721,major,0.493154,0.071753,138.889999,4,0.818441,0.083863,-0.242108,-0.014258,0.096396,0.417641,-0.050576,-0.204757,-0.172563
3,t_0f90acc7-d5c5-4e53-901d-55610fbd090c,237.653336,1988,96.79683,0.042606,0.389634,0.359044,0.585673,6.068578,0.665398,0.947322,1.444963e-05,0,0.251502,-12.159,0.702948,major,0.212197,0.029425,133.139008,4,0.594829,0.192498,0.340039,0.034846,-0.389794,0.518381,0.185008,-0.079907,-0.016978
4,t_36b9ad02-095a-443d-a697-6c7285d9410a,174.600006,1987,97.905891,0.249982,0.51364,0.485435,0.635095,7.198735,0.408715,1.014063,0.526688,0,0.21837,-13.813,0.888889,major,0.193438,0.032178,152.212006,4,0.591289,0.270586,-0.411061,0.165898,0.225652,0.335518,-0.036643,-0.0163,-0.44687


In [6]:
session_data.info(), print('='*100), track_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 167880 entries, 0 to 167879
Data columns (total 21 columns):
 #   Column                           Non-Null Count   Dtype  
---  ------                           --------------   -----  
 0   session_id                       167880 non-null  object 
 1   session_position                 167880 non-null  float64
 2   session_length                   167880 non-null  float64
 3   track_id                         167880 non-null  object 
 4   skip_1                           167880 non-null  object 
 5   skip_2                           167880 non-null  object 
 6   skip_3                           167880 non-null  object 
 7   not_skipped                      167880 non-null  object 
 8   context_switch                   167880 non-null  float64
 9   no_pause_before_play             167880 non-null  float64
 10  short_pause_before_play          167880 non-null  float64
 11  long_pause_before_play           167880 non-null  float64
 12  hi

(None, None, None)

## Processing Data

In [7]:
session_track_data = pd.merge(session_data, track_data, on='track_id', how='left')

In [8]:
session_track_data.head()

Unnamed: 0,session_id,session_position,session_length,track_id,skip_1,skip_2,skip_3,not_skipped,context_switch,no_pause_before_play,short_pause_before_play,long_pause_before_play,hist_user_behavior_n_seekfwd,hist_user_behavior_n_seekback,hist_user_behavior_is_shuffle,hour_of_day,date,premium,context_type,hist_user_behavior_reason_start,hist_user_behavior_reason_end,duration,release_year,us_popularity_estimate,acousticness,beat_strength,bounciness,danceability,dyn_range_mean,energy,flatness,instrumentalness,key,liveness,loudness,mechanism,mode,organism,speechiness,tempo,time_signature,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7
0,0_00006f66-33e5-4de7-a324-2d18e439fc1e,1.0,20.0,t_0479f24c-27d2-46d6-a00c-7ec928f2b539,False,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone,180.066666,2018,99.968133,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484468,1,0.678553,-6.577,0.546784,major,0.320668,0.069717,134.024994,4,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826
1,0_00006f66-33e5-4de7-a324-2d18e439fc1e,2.0,20.0,t_9099cd7b-c238-47b7-9381-f23f2c1d1043,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone,236.796371,2018,99.896728,0.061811,0.654804,0.735661,0.877393,11.30875,0.726828,1.025636,1.031315e-07,7,0.104322,-5.319,0.824766,minor,0.131391,0.061158,130.037994,4,0.337152,-0.713646,0.363718,0.310315,-0.042222,-0.383164,0.066357,-0.365308,0.15792
2,0_00006f66-33e5-4de7-a324-2d18e439fc1e,3.0,20.0,t_fc5df5ba-5396-49a7-8b29-35d0d28249e0,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone,231.266663,2018,99.999976,0.354116,0.532155,0.540411,0.679719,8.065802,0.563009,1.029465,2.659035e-08,10,0.135776,-5.843,0.774327,major,0.296923,0.045354,145.028,4,0.373862,-0.742541,0.375599,0.25266,-0.049007,-0.299745,0.063341,-0.486689,0.181604
3,0_00006f66-33e5-4de7-a324-2d18e439fc1e,4.0,20.0,t_23cff8d6-d874-4b20-83dc-94e450e8aa20,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone,169.826675,2018,99.995038,0.769225,0.641756,0.729224,0.864881,11.287586,0.529484,0.99352,6.598388e-06,1,0.103722,-7.756,0.630996,major,0.603271,0.229936,111.982002,4,0.64942,-0.705116,0.317562,0.289141,-0.03892,-0.393358,0.092719,-0.364418,0.285603
4,0_00006f66-33e5-4de7-a324-2d18e439fc1e,5.0,20.0,t_64f3743c-f624-46bb-a579-0f3f9a07a123,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone,210.545258,2018,99.998498,0.006602,0.732428,0.794881,0.857778,12.181586,0.650057,1.000571,2.066649e-06,8,0.120842,-4.919,0.759465,major,0.170148,0.24098,147.031006,4,0.652921,-0.868489,0.33128,0.210478,0.08474,-0.333287,-0.025706,-0.51035,0.182315


In [9]:
session_track_data.tail()

Unnamed: 0,session_id,session_position,session_length,track_id,skip_1,skip_2,skip_3,not_skipped,context_switch,no_pause_before_play,short_pause_before_play,long_pause_before_play,hist_user_behavior_n_seekfwd,hist_user_behavior_n_seekback,hist_user_behavior_is_shuffle,hour_of_day,date,premium,context_type,hist_user_behavior_reason_start,hist_user_behavior_reason_end,duration,release_year,us_popularity_estimate,acousticness,beat_strength,bounciness,danceability,dyn_range_mean,energy,flatness,instrumentalness,key,liveness,loudness,mechanism,mode,organism,speechiness,tempo,time_signature,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7
167875,0_0eaeef5d-25e9-4429-bd55-af15d3604c9f,16.0,20.0,t_360910e8-2a84-42b0-baf1-59abcf96a1f2,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,False,13.0,7/15/2018,True,user_collection,trackdone,trackdone,241.306671,2018,99.926393,0.100227,0.55979,0.571664,0.565336,8.411594,0.875369,0.981698,0.000324698,2,0.244097,-5.905,0.430147,minor,0.409132,0.045821,155.546005,4,0.891322,-0.530003,-0.020507,0.221878,0.232069,0.218059,0.086237,-0.181878,0.14504
167876,0_0eaeef5d-25e9-4429-bd55-af15d3604c9f,17.0,20.0,t_aa2fff77-9b0a-4fa3-a685-ecef50310e8a,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,False,13.0,7/15/2018,True,user_collection,trackdone,trackdone,212.533325,2018,99.905106,0.247189,0.601121,0.632665,0.703095,9.269985,0.529001,1.025182,6.178943e-08,1,0.322316,-7.339,0.438776,minor,0.433633,0.03652,134.427002,4,0.701824,-0.472023,-0.011628,0.234998,0.169529,0.321254,0.105851,-0.215453,-0.013233
167877,0_0eaeef5d-25e9-4429-bd55-af15d3604c9f,18.0,20.0,t_f673e1b7-4ebe-4fc1-ac24-a9f25de70381,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,False,13.0,7/15/2018,True,user_collection,trackdone,trackdone,204.293335,2018,99.915604,0.2984,0.706898,0.730013,0.791413,10.419632,0.951893,0.940008,0.0004260943,1,0.274913,-4.703,0.896789,major,0.223266,0.094902,153.979996,4,0.961417,-0.628608,0.052259,0.219314,0.238612,0.032383,0.072873,-0.129108,0.214087
167878,0_0eaeef5d-25e9-4429-bd55-af15d3604c9f,19.0,20.0,t_e172e8e7-7161-42a9-acb0-d606346c8f87,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,False,13.0,7/15/2018,True,user_collection,trackdone,trackdone,193.773331,2018,99.882031,0.03811,0.492183,0.499016,0.645011,7.658142,0.871987,0.983273,0.004341016,4,0.047525,-4.2,0.62753,minor,0.264751,0.07543,87.466003,4,0.743112,-0.675257,0.102726,0.123573,0.13624,-0.222776,-0.034961,-0.478699,0.19782
167879,0_0eaeef5d-25e9-4429-bd55-af15d3604c9f,20.0,20.0,t_77977dd6-597e-4425-8f8f-4efb32ecfba6,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,False,13.0,7/15/2018,True,user_collection,trackdone,trackdone,216.853333,2018,99.877843,0.06599,0.572023,0.620852,0.776665,9.309435,0.726259,1.007707,0.003554401,1,0.139462,-7.509,0.729904,major,0.196605,0.081394,101.018997,4,0.704467,-0.737149,0.165157,0.321281,0.197732,-0.179035,0.010123,-0.281833,0.126489


In [10]:
session_track_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 167880 entries, 0 to 167879
Data columns (total 50 columns):
 #   Column                           Non-Null Count   Dtype  
---  ------                           --------------   -----  
 0   session_id                       167880 non-null  object 
 1   session_position                 167880 non-null  float64
 2   session_length                   167880 non-null  float64
 3   track_id                         167880 non-null  object 
 4   skip_1                           167880 non-null  object 
 5   skip_2                           167880 non-null  object 
 6   skip_3                           167880 non-null  object 
 7   not_skipped                      167880 non-null  object 
 8   context_switch                   167880 non-null  float64
 9   no_pause_before_play             167880 non-null  float64
 10  short_pause_before_play          167880 non-null  float64
 11  long_pause_before_play           167880 non-null  float64
 12  hi

In [11]:
session_track_data['session_position'] = session_track_data['session_position'].astype(int) 
session_track_data["track_index"] = session_track_data["session_position"]
session_track_data.set_index(["session_id", "track_index"], inplace=True)
session_track_data.sort_index(inplace=True)

In [12]:
session_track_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,session_position,session_length,track_id,skip_1,skip_2,skip_3,not_skipped,context_switch,no_pause_before_play,short_pause_before_play,long_pause_before_play,hist_user_behavior_n_seekfwd,hist_user_behavior_n_seekback,hist_user_behavior_is_shuffle,hour_of_day,date,premium,context_type,hist_user_behavior_reason_start,hist_user_behavior_reason_end,duration,release_year,us_popularity_estimate,acousticness,beat_strength,bounciness,danceability,dyn_range_mean,energy,flatness,instrumentalness,key,liveness,loudness,mechanism,mode,organism,speechiness,tempo,time_signature,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7
session_id,track_index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
0_00006f66-33e5-4de7-a324-2d18e439fc1e,1,1,20.0,t_0479f24c-27d2-46d6-a00c-7ec928f2b539,False,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone,180.066666,2018,99.968133,0.015848,0.438551,0.473455,0.653119,7.660024,0.553465,1.035007,0.003484468,1,0.678553,-6.577,0.546784,major,0.320668,0.069717,134.024994,4,0.152255,-0.815775,0.386409,0.23016,0.028028,-0.333373,0.015452,-0.35359,0.205826
0_00006f66-33e5-4de7-a324-2d18e439fc1e,2,2,20.0,t_9099cd7b-c238-47b7-9381-f23f2c1d1043,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone,236.796371,2018,99.896728,0.061811,0.654804,0.735661,0.877393,11.30875,0.726828,1.025636,1.031315e-07,7,0.104322,-5.319,0.824766,minor,0.131391,0.061158,130.037994,4,0.337152,-0.713646,0.363718,0.310315,-0.042222,-0.383164,0.066357,-0.365308,0.15792
0_00006f66-33e5-4de7-a324-2d18e439fc1e,3,3,20.0,t_fc5df5ba-5396-49a7-8b29-35d0d28249e0,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone,231.266663,2018,99.999976,0.354116,0.532155,0.540411,0.679719,8.065802,0.563009,1.029465,2.659035e-08,10,0.135776,-5.843,0.774327,major,0.296923,0.045354,145.028,4,0.373862,-0.742541,0.375599,0.25266,-0.049007,-0.299745,0.063341,-0.486689,0.181604
0_00006f66-33e5-4de7-a324-2d18e439fc1e,4,4,20.0,t_23cff8d6-d874-4b20-83dc-94e450e8aa20,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone,169.826675,2018,99.995038,0.769225,0.641756,0.729224,0.864881,11.287586,0.529484,0.99352,6.598388e-06,1,0.103722,-7.756,0.630996,major,0.603271,0.229936,111.982002,4,0.64942,-0.705116,0.317562,0.289141,-0.03892,-0.393358,0.092719,-0.364418,0.285603
0_00006f66-33e5-4de7-a324-2d18e439fc1e,5,5,20.0,t_64f3743c-f624-46bb-a579-0f3f9a07a123,False,False,False,True,0.0,1.0,0.0,0.0,0.0,0.0,True,16.0,7/15/2018,True,editorial_playlist,trackdone,trackdone,210.545258,2018,99.998498,0.006602,0.732428,0.794881,0.857778,12.181586,0.650057,1.000571,2.066649e-06,8,0.120842,-4.919,0.759465,major,0.170148,0.24098,147.031006,4,0.652921,-0.868489,0.33128,0.210478,0.08474,-0.333287,-0.025706,-0.51035,0.182315
