In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
#Modules for EDA
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

#Modules for ML
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow import keras
from sklearn.metrics import classification_report,confusion_matrix
plt.style.use('seaborn')
%matplotlib inline

In [None]:
df = pd.read_csv('../input/spotify-recommendation/data.csv')
df.shape

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df.head()

In [None]:
df['liked'].value_counts().plot(kind='pie',autopct='%.2f',)
plt.show()

In [None]:
plt.figure(figsize=(15,15))
sns.pairplot(data=df, hue='liked')
plt.show()

# **Feature Scaling**

In [None]:
std = StandardScaler()
scalled = std.fit_transform(df.drop('liked',axis=1))

In [None]:
i = 0 
for column in df.columns[:-1]:
    df[column] = scalled[:,i]
    i += 1

In [None]:
df.head()

# **Splitting and training data**

In [None]:
x, y = df.drop('liked',axis=1),df['liked']
x.shape, y.shape

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size=0.3, random_state=1)
x_train.shape ,x_test.shape ,y_train.shape ,y_test.shape

In [None]:
y_train.value_counts()

# **Model Building and predictions**

In [None]:
model = keras.Sequential([
    keras.layers.Dense(13, input_shape=(13,), activation='relu'),
    keras.layers.Dense(7, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid'),

])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
    
)

In [None]:
model.fit(x_train, y_train, epochs=200) #Runned Twice

In [None]:
model.evaluate(x_test, y_test)

In [None]:
def predict(data):
    pred = model.predict(data).flatten()
    pred[pred >= 0.5] = 1
    pred[pred < 0.5] = 0
    return pred

def plot_cm(y_true,y_pred,title=None):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10,10))
    sns.heatmap(cm, annot=True, fmt='g',cmap='YlGnBu')
    plt.title(title)
    plt.show()

# **Test Data Report**

In [None]:
y_pred_test = predict(x_test)
plot_cm(y_test, y_pred_test)

In [None]:
print(classification_report(y_test, y_pred_test))

# **Train Data Report**

In [None]:
y_pred_train = predict(x_train)
plot_cm(y_train, y_pred_train)

In [None]:
print(classification_report(y_train, y_pred_train))

# **Saving Model as file**

In [None]:
!mkdir model
!ls

In [None]:
model.save('model/Spotify_model')

In [None]:
!ls model/
!ls model/Spotify_model

# **Creating Predict Function to predict new data**

In [None]:
df2 = pd.read_csv('../input/spotify-recommendation/data.csv')
    
std2 = StandardScaler()
std2.fit(df2.drop('liked',axis=1))
print(std2.mean_)
print(std2.scale_)

In [None]:
def new_predict(data):
    #Data must be 2d Array
    
    model2 = keras.models.load_model('model/Spotify_model')
    #Make sure model is in same directory
    
    mean = np.array([ 6.36656410e-01,  6.38431487e-01,  5.49743590e+00, -9.48163077e+00,
                      5.38461538e-01, 1.48957436e-01,  3.19093091e-01,  1.92337256e-01,
                      1.48455385e-01,  4.93631795e-01,  1.21086174e+02,  2.13408933e+05,
                      3.91282051e+00])
    stddev =   np.array([2.16057827e-01, 2.59428046e-0, 3.40644090e+00, 6.0833346e+00,
                         4.98518515e-01, 1.20104503e-01, 3.19958090e-01, 3.45337110e-01,
                         1.05702467e-01, 2.67007774e-01, 2.80127239e+01, 7.19671489e+04,
                         4.50173464e-01])
    
    scalled = (data - mean)/stddev
    predict_proba = model2.predict(scalled)[0][0]
    predicted = predict_proba >= 0.5
    if predicted:
        print("You will Like this song")
        print("Probability to like this song:",predict_proba)
    else:
        print("You will Dislike this song")
        print("Probability to like this song:",predict_proba)

In [None]:
case1 = df2.loc[0][:-1]
print("data:\n",case1)
print('\n')
new_predict([case1])

In [None]:
case2 = df2.loc[4][:-1]
print("data:\n",case2)
print('\n')
new_predict([case2])