In [None]:

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, silhouette_samples
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
import pickle

# Load Data
path = 'data.csv'
data = pd.read_csv(path)
data1 = data.copy()

# Data Preprocessing
data.drop(columns=['artists', 'name', 'release_date', 'id'], axis=1, inplace=True)
data.fillna(data.mean(), inplace=True)

# Data Normalization
scaler = StandardScaler()
data = scaler.fit_transform(data)
data = pd.DataFrame(data, columns=data1.columns[1:])

# Feature Selection using PCA
pca = PCA(n_components=32)
data_pca = pca.fit_transform(data)
data_pca = pd.DataFrame(data_pca)
data_pca['Y'] = data1['popularity']

# KMeans Clustering
kmeans = KMeans(n_clusters=24, random_state=0)
data_pca['Cluster'] = kmeans.fit_predict(data_pca)

# Split Data
X = data_pca.drop(columns=['Y', 'Cluster'])
Y = data_pca['Y']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

# Train Linear Regression Model
lr = LinearRegression()
lr.fit(X_train, Y_train)
y_pred_lr = lr.predict(X_test)
print("Linear Regression:
", classification_report(Y_test, y_pred_lr.round()))

# Train Logistic Regression Model
logr = LogisticRegression()
logr.fit(X_train, Y_train)
y_pred_logr = logr.predict(X_test)
print("Logistic Regression:
", classification_report(Y_test, y_pred_logr))

# Train Decision Tree Model
dt = DecisionTreeClassifier()
dt.fit(X_train, Y_train)
y_pred_dt = dt.predict(X_test)
print("Decision Tree:
", classification_report(Y_test, y_pred_dt))

# Train Random Forest Model
rf = RandomForestClassifier()
rf.fit(X_train, Y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest:
", classification_report(Y_test, y_pred_rf))

# Train SVM Model
svm = SVC()
svm.fit(X_train, Y_train)
y_pred_svm = svm.predict(X_test)
print("SVM:
", classification_report(Y_test, y_pred_svm))

# Train KNN Model
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)
y_pred_knn = knn.predict(X_test)
print("KNN:
", classification_report(Y_test, y_pred_knn))

# Save Models
models_dict = {
    'lr': lr,
    'logr': logr,
    'dt': dt,
    'rf': rf,
    'svm': svm,
    'knn': knn
}
for model_name, model in models_dict.items():
    with open(f'{model_name}_model.pkl', 'wb') as f:
        pickle.dump(model, f)

# Function for Recommendations
def recommendation(spotify_link, suggestion_length, data1, data_pca):
    spotify_id = spotify_link.split(':')[2]
    spotify_index = data1[data1['id'] == spotify_id].index[0]
    spotify_cluster_num = data_pca.iloc[spotify_index]['Cluster']
    recommendations = data1[data_pca['Cluster'] == spotify_cluster_num].sample(suggestion_length)
    return recommendations[['name', 'artists', 'popularity', 'release_date']]

spotify_link = input("Enter Which song were you listening: ")
recommendations = recommendation(spotify_link, 5, data1, data_pca)
print(recommendations)
