In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.api as sm

In [2]:
df = pd.read_csv("./data/spotify_top_songs_audio_features.csv")
df.head()

Unnamed: 0,id,artist_names,track_name,source,key,mode,time_signature,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence,loudness,tempo,duration_ms,weeks_on_chart,streams
0,000xQL6tZNLJzIrtIgxqSl,"ZAYN, PARTYNEXTDOOR",Still Got Time (feat. PARTYNEXTDOOR),RCA Records Label,G,Major,4 beats,0.748,0.627,0.0639,0.131,0.0,0.0852,0.524,-6.029,120.963,188491,17,107527761
1,003eoIwxETJujVWmNFMoZy,Alessia Cara,Growing Pains,Def Jam Recordings,C#/Db,Minor,4 beats,0.353,0.755,0.733,0.0822,0.0,0.39,0.437,-6.276,191.153,193680,2,9944865
2,003vvx7Niy0yvhvHt4a68B,The Killers,Mr. Brightside,Island Records,C#/Db,Major,4 beats,0.352,0.911,0.0747,0.00121,0.0,0.0995,0.236,-5.23,148.033,222973,125,512388123
3,00B7TZ0Xawar6NZ00JFomN,"Cardi B, Chance the Rapper",Best Life (feat. Chance The Rapper),Atlantic/KSR,A,Major,4 beats,0.62,0.625,0.553,0.287,0.0,0.314,0.665,-7.438,167.911,284856,2,11985346
4,00Blm7zeNqgYLPtW6zg8cj,"Post Malone, The Weeknd",One Right Now (with The Weeknd),Republic Records,C#/Db,Major,4 beats,0.687,0.781,0.053,0.0361,0.0,0.0755,0.688,-4.806,97.014,193507,30,301860377


In [3]:
ndf = df.drop(["artist_names", "id", "track_name", "source", "key", "mode"], axis = 1)
ndf.head()

Unnamed: 0,time_signature,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence,loudness,tempo,duration_ms,weeks_on_chart,streams
0,4 beats,0.748,0.627,0.0639,0.131,0.0,0.0852,0.524,-6.029,120.963,188491,17,107527761
1,4 beats,0.353,0.755,0.733,0.0822,0.0,0.39,0.437,-6.276,191.153,193680,2,9944865
2,4 beats,0.352,0.911,0.0747,0.00121,0.0,0.0995,0.236,-5.23,148.033,222973,125,512388123
3,4 beats,0.62,0.625,0.553,0.287,0.0,0.314,0.665,-7.438,167.911,284856,2,11985346
4,4 beats,0.687,0.781,0.053,0.0361,0.0,0.0755,0.688,-4.806,97.014,193507,30,301860377


In [4]:
onehot_mode = pd.get_dummies(df["mode"], prefix = "mode", dtype = "int64")

In [5]:
ndf["time_signature"] = ndf["time_signature"].str.slice(stop = 1).astype(int)

In [6]:
ss = StandardScaler()

In [7]:
ndf_ss = ndf_ss = ss.fit_transform(ndf.drop("streams", axis = 1))

In [8]:
ndf_ss = pd.DataFrame(ndf_ss, columns = ndf.drop("streams", axis = 1).columns)

In [9]:
ndf_ss = pd.concat([ndf_ss, onehot_mode, ndf["streams"]], axis = 1)

In [10]:
ndf_ss.head()

Unnamed: 0,time_signature,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence,loudness,tempo,duration_ms,weeks_on_chart,mode_Major,mode_Minor,streams
0,0.142698,0.467421,-0.057781,-0.511608,-0.432091,-0.165928,-0.687957,0.139166,0.126844,-0.039242,-0.286115,0.140563,1,0,107527761
1,0.142698,-2.318656,0.718919,5.387041,-0.631465,-0.165928,1.520039,-0.244123,0.029444,2.34705,-0.180639,-0.482303,0,1,9944865
2,0.142698,-2.325709,1.665521,-0.416398,-0.962353,-0.165928,-0.584366,-1.129651,0.441917,0.881074,0.414798,4.625203,1,0,512388123
3,0.142698,-0.435409,-0.069917,3.800198,0.205254,-0.165928,0.969489,0.760357,-0.428773,1.556878,1.672689,-0.482303,1,0,11985346
4,0.142698,0.037166,0.876686,-0.6077,-0.819809,-0.165928,-0.758224,0.861686,0.609115,-0.85345,-0.184156,0.680381,1,0,301860377


In [11]:
x = sm.add_constant(ndf_ss.drop("streams", axis = 1))
y = ndf_ss["streams"]

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 7)

In [13]:
lr = LinearRegression()
lr.fit(x_train, y_train)

In [14]:
lr.score(x_test, y_test)

0.7236574032561478

In [15]:
model = sm.OLS(y_train, x_train)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.736
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,1043.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:11,Log-Likelihood:,-97688.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4870,BIC:,195500.0
Df Model:,13,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.354e+07,1.14e+06,64.692,0.000,7.13e+07,7.58e+07
time_signature,-2.431e+06,1.7e+06,-1.428,0.153,-5.77e+06,9.06e+05
danceability,6.94e+04,1.94e+06,0.036,0.972,-3.74e+06,3.88e+06
energy,3.182e+05,2.76e+06,0.115,0.908,-5.1e+06,5.74e+06
speechiness,-3.786e+06,1.74e+06,-2.177,0.030,-7.2e+06,-3.76e+05
acousticness,1.834e+06,2.07e+06,0.886,0.376,-2.22e+06,5.89e+06
instrumentalness,-7.448e+05,1.64e+06,-0.453,0.650,-3.97e+06,2.48e+06
liveness,8.228e+05,1.7e+06,0.484,0.628,-2.51e+06,4.16e+06
valence,5.732e+05,2e+06,0.286,0.775,-3.35e+06,4.5e+06

0,1,2,3
Omnibus:,3463.413,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,351132.783
Skew:,2.593,Prob(JB):,0.0
Kurtosis:,44.214,Cond. No.,9240000000000000.0


In [16]:
vif = pd.DataFrame()
vif["VIF_Factor"] = [variance_inflation_factor(x_train.values, i) for i in range(x_train.shape[1])]
vif["feature"] = x_train.columns
vif.round(1)

  return 1 - self.ssr/self.centered_tss
  vif = 1. / (1. - r_squared_i)


Unnamed: 0,VIF_Factor,feature
0,0.0,const
1,1.0,time_signature
2,1.3,danceability
3,2.7,energy
4,1.1,speechiness
5,1.5,acousticness
6,1.1,instrumentalness
7,1.0,liveness
8,1.4,valence
9,2.2,loudness


In [17]:
new_x = x_train.drop("danceability", axis = 1)
model = sm.OLS(y_train, new_x)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.736
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,1130.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:12,Log-Likelihood:,-97688.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4871,BIC:,195500.0
Df Model:,12,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.354e+07,1.14e+06,64.714,0.000,7.13e+07,7.58e+07
time_signature,-2.426e+06,1.7e+06,-1.430,0.153,-5.75e+06,9e+05
energy,3.023e+05,2.73e+06,0.111,0.912,-5.04e+06,5.65e+06
speechiness,-3.774e+06,1.71e+06,-2.207,0.027,-7.13e+06,-4.22e+05
acousticness,1.816e+06,2.01e+06,0.905,0.365,-2.12e+06,5.75e+06
instrumentalness,-7.457e+05,1.64e+06,-0.454,0.650,-3.97e+06,2.48e+06
liveness,8.158e+05,1.69e+06,0.483,0.629,-2.49e+06,4.13e+06
valence,5.968e+05,1.89e+06,0.316,0.752,-3.11e+06,4.3e+06
loudness,-2.323e+06,2.51e+06,-0.924,0.355,-7.25e+06,2.6e+06

0,1,2,3
Omnibus:,3463.285,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,351088.458
Skew:,2.593,Prob(JB):,0.0
Kurtosis:,44.211,Cond. No.,1.32e+16


In [18]:
new_x = new_x.drop("energy", axis = 1)
model = sm.OLS(y_train, new_x)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.736
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,1233.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:12,Log-Likelihood:,-97688.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4872,BIC:,195500.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.354e+07,1.14e+06,64.720,0.000,7.13e+07,7.58e+07
time_signature,-2.418e+06,1.69e+06,-1.427,0.154,-5.74e+06,9.04e+05
speechiness,-3.778e+06,1.71e+06,-2.210,0.027,-7.13e+06,-4.26e+05
acousticness,1.74e+06,1.89e+06,0.922,0.356,-1.96e+06,5.44e+06
instrumentalness,-7.268e+05,1.63e+06,-0.445,0.657,-3.93e+06,2.48e+06
liveness,8.423e+05,1.67e+06,0.504,0.614,-2.43e+06,4.12e+06
valence,6.544e+05,1.82e+06,0.360,0.719,-2.91e+06,4.22e+06
loudness,-2.153e+06,1.99e+06,-1.082,0.279,-6.05e+06,1.75e+06
tempo,5.501e+05,1.71e+06,0.321,0.748,-2.81e+06,3.91e+06

0,1,2,3
Omnibus:,3463.777,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,351228.466
Skew:,2.593,Prob(JB):,0.0
Kurtosis:,44.219,Cond. No.,1.14e+16


In [19]:
new_x = new_x.drop("tempo", axis = 1)
model = sm.OLS(y_train, new_x)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.736
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,1357.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:12,Log-Likelihood:,-97688.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4873,BIC:,195500.0
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.354e+07,1.14e+06,64.726,0.000,7.13e+07,7.58e+07
time_signature,-2.434e+06,1.69e+06,-1.437,0.151,-5.75e+06,8.87e+05
speechiness,-3.706e+06,1.7e+06,-2.187,0.029,-7.03e+06,-3.83e+05
acousticness,1.713e+06,1.88e+06,0.909,0.364,-1.98e+06,5.41e+06
instrumentalness,-7.231e+05,1.63e+06,-0.443,0.658,-3.93e+06,2.48e+06
liveness,8.322e+05,1.67e+06,0.498,0.618,-2.44e+06,4.11e+06
valence,6.8e+05,1.82e+06,0.375,0.708,-2.88e+06,4.24e+06
loudness,-2.123e+06,1.99e+06,-1.068,0.286,-6.02e+06,1.77e+06
duration_ms,-2.107e+06,1.7e+06,-1.241,0.215,-5.44e+06,1.22e+06

0,1,2,3
Omnibus:,3465.13,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,351660.046
Skew:,2.595,Prob(JB):,0.0
Kurtosis:,44.245,Cond. No.,6270000000000000.0


In [20]:
new_x = new_x.drop("valence", axis = 1)
model = sm.OLS(y_train, new_x)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.736
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,1508.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:12,Log-Likelihood:,-97688.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4874,BIC:,195500.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.354e+07,1.14e+06,64.731,0.000,7.13e+07,7.58e+07
time_signature,-2.42e+06,1.69e+06,-1.429,0.153,-5.74e+06,9e+05
speechiness,-3.677e+06,1.69e+06,-2.172,0.030,-7e+06,-3.58e+05
acousticness,1.754e+06,1.88e+06,0.932,0.351,-1.93e+06,5.44e+06
instrumentalness,-7.706e+05,1.63e+06,-0.473,0.636,-3.96e+06,2.42e+06
liveness,8.413e+05,1.67e+06,0.504,0.614,-2.43e+06,4.12e+06
loudness,-1.91e+06,1.9e+06,-1.003,0.316,-5.64e+06,1.82e+06
duration_ms,-2.21e+06,1.68e+06,-1.319,0.187,-5.5e+06,1.08e+06
weeks_on_chart,1.981e+08,1.71e+06,115.570,0.000,1.95e+08,2.01e+08

0,1,2,3
Omnibus:,3465.196,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,351531.175
Skew:,2.595,Prob(JB):,0.0
Kurtosis:,44.237,Cond. No.,6760000000000000.0


In [21]:
new_x = new_x.drop("instrumentalness", axis = 1)
model = sm.OLS(y_train, new_x)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.736
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,1697.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:12,Log-Likelihood:,-97688.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4875,BIC:,195500.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.353e+07,1.14e+06,64.736,0.000,7.13e+07,7.58e+07
time_signature,-2.421e+06,1.69e+06,-1.429,0.153,-5.74e+06,8.99e+05
speechiness,-3.62e+06,1.69e+06,-2.144,0.032,-6.93e+06,-3.09e+05
acousticness,1.772e+06,1.88e+06,0.942,0.346,-1.92e+06,5.46e+06
liveness,8.336e+05,1.67e+06,0.499,0.618,-2.44e+06,4.11e+06
loudness,-1.762e+06,1.88e+06,-0.938,0.348,-5.45e+06,1.92e+06
duration_ms,-2.184e+06,1.67e+06,-1.304,0.192,-5.47e+06,1.1e+06
weeks_on_chart,1.981e+08,1.71e+06,115.607,0.000,1.95e+08,2.01e+08
mode_Major,3.34e+07,1.72e+06,19.439,0.000,3e+07,3.68e+07

0,1,2,3
Omnibus:,3464.495,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,351472.265
Skew:,2.594,Prob(JB):,0.0
Kurtosis:,44.234,Cond. No.,1.07e+16


In [22]:
new_x = new_x.drop("liveness", axis = 1)
model = sm.OLS(y_train, new_x)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.736
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,1940.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:12,Log-Likelihood:,-97688.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4876,BIC:,195400.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.353e+07,1.14e+06,64.745,0.000,7.13e+07,7.58e+07
time_signature,-2.434e+06,1.69e+06,-1.438,0.151,-5.75e+06,8.85e+05
speechiness,-3.572e+06,1.69e+06,-2.119,0.034,-6.88e+06,-2.67e+05
acousticness,1.763e+06,1.88e+06,0.937,0.349,-1.92e+06,5.45e+06
loudness,-1.707e+06,1.88e+06,-0.910,0.363,-5.38e+06,1.97e+06
duration_ms,-2.185e+06,1.67e+06,-1.305,0.192,-5.47e+06,1.1e+06
weeks_on_chart,1.98e+08,1.71e+06,115.725,0.000,1.95e+08,2.01e+08
mode_Major,3.34e+07,1.72e+06,19.442,0.000,3e+07,3.68e+07
mode_Minor,4.013e+07,1.88e+06,21.348,0.000,3.64e+07,4.38e+07

0,1,2,3
Omnibus:,3465.075,Durbin-Watson:,2.022
Prob(Omnibus):,0.0,Jarque-Bera (JB):,351646.655
Skew:,2.594,Prob(JB):,0.0
Kurtosis:,44.244,Cond. No.,1.07e+16


In [23]:
new_x = new_x.drop("loudness", axis = 1)
model = sm.OLS(y_train, new_x)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.736
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,2263.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:12,Log-Likelihood:,-97689.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4877,BIC:,195400.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.353e+07,1.14e+06,64.741,0.000,7.13e+07,7.58e+07
time_signature,-2.469e+06,1.69e+06,-1.459,0.145,-5.79e+06,8.49e+05
speechiness,-3.393e+06,1.67e+06,-2.027,0.043,-6.68e+06,-1.11e+05
acousticness,2.487e+06,1.7e+06,1.460,0.144,-8.53e+05,5.83e+06
duration_ms,-2.19e+06,1.67e+06,-1.308,0.191,-5.47e+06,1.09e+06
weeks_on_chart,1.98e+08,1.71e+06,115.812,0.000,1.95e+08,2.01e+08
mode_Major,3.343e+07,1.72e+06,19.460,0.000,3.01e+07,3.68e+07
mode_Minor,4.009e+07,1.88e+06,21.335,0.000,3.64e+07,4.38e+07

0,1,2,3
Omnibus:,3463.811,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,351462.527
Skew:,2.593,Prob(JB):,0.0
Kurtosis:,44.233,Cond. No.,1.09e+16


In [24]:
new_x = new_x.drop("duration_ms", axis = 1)
model = sm.OLS(y_train, new_x)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.736
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,2715.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:12,Log-Likelihood:,-97689.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4878,BIC:,195400.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.353e+07,1.14e+06,64.742,0.000,7.13e+07,7.58e+07
time_signature,-2.446e+06,1.69e+06,-1.445,0.149,-5.76e+06,8.73e+05
speechiness,-3.319e+06,1.67e+06,-1.983,0.047,-6.6e+06,-3.84e+04
acousticness,2.59e+06,1.7e+06,1.521,0.128,-7.47e+05,5.93e+06
weeks_on_chart,1.979e+08,1.71e+06,115.806,0.000,1.95e+08,2.01e+08
mode_Major,3.335e+07,1.72e+06,19.423,0.000,3e+07,3.67e+07
mode_Minor,4.018e+07,1.88e+06,21.395,0.000,3.65e+07,4.39e+07

0,1,2,3
Omnibus:,3468.612,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,352149.179
Skew:,2.599,Prob(JB):,0.0
Kurtosis:,44.273,Cond. No.,1.07e+16


In [25]:
new_x = new_x.drop("time_signature", axis = 1)
model = sm.OLS(y_train, new_x)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.736
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,3392.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:12,Log-Likelihood:,-97690.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4879,BIC:,195400.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.353e+07,1.14e+06,64.735,0.000,7.13e+07,7.58e+07
speechiness,-3.509e+06,1.67e+06,-2.103,0.035,-6.78e+06,-2.38e+05
acousticness,2.895e+06,1.69e+06,1.714,0.087,-4.16e+05,6.21e+06
weeks_on_chart,1.979e+08,1.71e+06,115.806,0.000,1.95e+08,2.01e+08
mode_Major,3.332e+07,1.72e+06,19.408,0.000,3e+07,3.67e+07
mode_Minor,4.021e+07,1.88e+06,21.405,0.000,3.65e+07,4.39e+07

0,1,2,3
Omnibus:,3466.803,Durbin-Watson:,2.022
Prob(Omnibus):,0.0,Jarque-Bera (JB):,351108.861
Skew:,2.597,Prob(JB):,0.0
Kurtosis:,44.211,Cond. No.,8050000000000000.0


In [26]:
new_x = new_x.drop("acousticness", axis = 1)
model = sm.OLS(y_train, new_x)
model = model.fit()
model.summary()

0,1,2,3
Dep. Variable:,streams,R-squared:,0.735
Model:,OLS,Adj. R-squared:,0.735
Method:,Least Squares,F-statistic:,4520.0
Date:,"Tue, 26 Mar 2024",Prob (F-statistic):,0.0
Time:,10:50:12,Log-Likelihood:,-97692.0
No. Observations:,4884,AIC:,195400.0
Df Residuals:,4880,BIC:,195400.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,7.353e+07,1.14e+06,64.722,0.000,7.13e+07,7.58e+07
speechiness,-3.724e+06,1.66e+06,-2.238,0.025,-6.99e+06,-4.62e+05
weeks_on_chart,1.979e+08,1.71e+06,115.771,0.000,1.95e+08,2.01e+08
mode_Major,3.333e+07,1.72e+06,19.409,0.000,3e+07,3.67e+07
mode_Minor,4.02e+07,1.88e+06,21.397,0.000,3.65e+07,4.39e+07

0,1,2,3
Omnibus:,3465.17,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,350929.426
Skew:,2.595,Prob(JB):,0.0
Kurtosis:,44.201,Cond. No.,1.1e+16


In [27]:
fin_train = x_train[["const", "speechiness", "weeks_on_chart", "mode_Major", "mode_Minor"]]
fin_test = x_test[["const", "speechiness", "weeks_on_chart", "mode_Major", "mode_Minor"]]

In [28]:
lr.fit(fin_train, y_train)

In [29]:
lr.score(fin_test, y_test)

0.7238685761142771