In [None]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity

# Load and prepare the data
data = pd.read_csv('songsDataset.csv',nrows=1000)
data.columns = data.columns.str.strip().str.replace("'", "")

# Create a sparse matrix from the data
sparse_item_matrix = csr_matrix((data["rating"].values, (data["userID"].values, data["songID"].values)))

# Compute the cosine similarity between items
item_similarity = cosine_similarity(sparse_item_matrix.T)

# Create user-item matrix
user_item_matrix = data.pivot_table(index='userID', columns='songID', values='rating').fillna(0)

# Compute the cosine similarity between users
user_similarity_matrix = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity_matrix, index=user_item_matrix.index, columns=user_item_matrix.index)


In [None]:
# Create a function to get similar items
def get_similar_items(song_id, item_similarity, top_n=5):
    similar_scores = item_similarity[song_id].flatten()
    top_n_indices = similar_scores.argsort()[:-top_n-1:-1][1:]
    return top_n_indices

# Create a function to recommend songs (item-based)
def recommend_songs_item_based(user_id, sparse_item_matrix, item_similarity, top_n=5):
    user_ratings = sparse_item_matrix[user_id].toarray().flatten()
    user_ratings = {i: rating for i, rating in enumerate(user_ratings) if rating > 0}
    recommendations = {}
    for song_id, rating in user_ratings.items():
        similar_items = get_similar_items(song_id, item_similarity, top_n)
        for similar_item in similar_items:
            if similar_item in recommendations:
                recommendations[similar_item] += rating
            else:
                recommendations[similar_item] = rating
    recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    return [song_id for song_id, _ in recommendations[:top_n]]

# Create a function to get similar users
def get_user_similarity(target_user, user_similarity_df, top_n=5):
    similar_scores = user_similarity_df[target_user].sort_values(ascending=False)
    similar_users = similar_scores.iloc[1:top_n+1].index.tolist()
    return similar_users

# Create a function to recommend songs (user-based)
def recommend_songs_user_based(userID, user_item_matrix, user_similarity_df, num_recs=5):
    similar_users = get_user_similarity(userID, user_similarity_df)
    recommended_songs = []
    for similar_userID in similar_users:
        for songID, rating in user_item_matrix.loc[similar_userID].items():
            if rating > 0:
                recommended_songs.append((songID, rating))
    recommended_songs = sorted(recommended_songs, key=lambda x: x[1], reverse=True)
    return [songID for songID, _ in recommended_songs[:num_recs]]


In [None]:
# Generate predictions for a user using both methods
def generate_predictions(user_id, sparse_item_matrix, item_similarity, user_item_matrix, user_similarity_df, top_n=5):
    item_based_recs = recommend_songs_item_based(user_id, sparse_item_matrix, item_similarity, top_n)
    user_based_recs = recommend_songs_user_based(user_id, user_item_matrix, user_similarity_df, top_n)
    return item_based_recs, user_based_recs

# Example for a specific user
user_id = int(input("Enter UserID: "))
item_based_recs, user_based_recs = generate_predictions(user_id, sparse_item_matrix, item_similarity, user_item_matrix, user_similarity_df)
print("Item-based recommendations:", item_based_recs)
print("User-based recommendations:", user_based_recs)


Enter UserID: 5
Item-based recommendations: [128621, 98571, 8063, 95898, 24033]
User-based recommendations: [105433, 106513, 5542, 12447, 19690]


In [None]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Define a fixed length for the recommendation lists
FIXED_LENGTH = 5

# Function to pad lists to a fixed length
def pad_list(lst, length):
    return lst + [0] * (length - len(lst))

# Prepare features and target for base models
X = []
y = []

# Assuming we have actual ratings for the songs
for user_id in data['userID'].unique()[:100]:
    user_data = data[data['userID'] == user_id]
    for song_id in user_data['songID']:
        item_based_recs, user_based_recs = generate_predictions(user_id, sparse_item_matrix, item_similarity, user_item_matrix, user_similarity_df)
        item_based_recs = pad_list(item_based_recs, FIXED_LENGTH)
        user_based_recs = pad_list(user_based_recs, FIXED_LENGTH)
        X.append(item_based_recs + user_based_recs)  # Combine both sets of recommendations
        y.append(user_data[user_data['songID'] == song_id]['rating'].values[0])

X = np.array(X)
y = np.array(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train base models
dt = DecisionTreeRegressor()
knn = KNeighborsRegressor()
lr = LogisticRegression(max_iter=1000)

dt.fit(X_train, y_train)
knn.fit(X_train, y_train)
lr.fit(X_train, y_train)

# Generate base model predictions
pred_dt = dt.predict(X_train)
pred_knn = knn.predict(X_train)
pred_lr = lr.predict(X_train)


In [None]:
from sklearn.linear_model import LinearRegression

# Stack base model predictions as features for the meta-model
stacked_features = np.column_stack((pred_dt, pred_knn, pred_lr))

# Train the meta-model
meta_model = LinearRegression()
meta_model.fit(stacked_features, y_train)

# Evaluate the meta-model
pred_dt_test = dt.predict(X_test)
pred_knn_test = knn.predict(X_test)
pred_lr_test = lr.predict(X_test)

stacked_features_test = np.column_stack((pred_dt_test, pred_knn_test, pred_lr_test))
y_pred = meta_model.predict(stacked_features_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))


Mean Squared Error: 1.6418628354119422


In [None]:
# check model accuracy
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error (MSE):", mse)

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)

# Calculate Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print("Root Mean Squared Error (RMSE):", rmse)


Mean Squared Error (MSE): 1.6418628354119422
Mean Absolute Error (MAE): 1.0262380952380952
Root Mean Squared Error (RMSE): 1.2813519561041542


In [None]:
def make_final_recommendation(user_id, sparse_item_matrix, item_similarity, user_item_matrix, user_similarity_df, dt, knn, lr, meta_model, top_n=5):
    item_based_recs, user_based_recs = generate_predictions(user_id, sparse_item_matrix, item_similarity, user_item_matrix, user_similarity_df)
    item_based_recs = pad_list(item_based_recs, FIXED_LENGTH)
    user_based_recs = pad_list(user_based_recs, FIXED_LENGTH)
    combined_features = np.array(item_based_recs + user_based_recs).reshape(1, -1)
    pred_dt = dt.predict(combined_features)
    pred_knn = knn.predict(combined_features)
    pred_lr = lr.predict(combined_features)
    stacked_features = np.column_stack((pred_dt, pred_knn, pred_lr))
    final_scores = meta_model.predict(stacked_features)

    # Combine the scores with the item-based and user-based recommendations
    combined_recs = {rec: score for rec, score in zip(item_based_recs + user_based_recs, final_scores.flatten())}
    sorted_recs = sorted(combined_recs.items(), key=lambda x: x[1], reverse=True)

    # Get the top_n unique recommendations
    top_recs = []
    seen = set()
    for rec, score in sorted_recs:
        if rec not in seen:
            top_recs.append(rec)
            seen.add(rec)
        if len(top_recs) == top_n:
            break

    return top_recs

In [None]:
user_id = int(input("Enter UserID: "))
final_recs = make_final_recommendation(user_id, sparse_item_matrix, item_similarity, user_item_matrix, user_similarity_df, dt, knn, lr, meta_model)
print("Final recommendations for user", user_id, ":", final_recs)

Enter UserID: 9
Final recommendations for user 9 : [130766]
