<a href="https://colab.research.google.com/github/omeshsingh/Charater_Recognition/blob/main/sportify_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Sample user-song interaction data
data = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 3],
    'song_id': [10, 20, 30, 20, 40, 10, 30, 50],
    'rating': [5, 3, 4, 2, 5, 4, 1, 3]
}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,user_id,song_id,rating
0,1,10,5
1,1,20,3
2,1,30,4
3,2,20,2
4,2,40,5


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, Dot, Input

class RecommenderNet(Model):
    def __init__(self, num_users, num_songs, embedding_size):
        super(RecommenderNet, self).__init__()
        self.user_embedding = Embedding(num_users, embedding_size, embeddings_initializer='he_normal', embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        self.song_embedding = Embedding(num_songs, embedding_size, embeddings_initializer='he_normal', embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        self.dot = Dot(axes=1)

    def call(self, inputs):
        user_vector = self.user_embedding(inputs[0])
        song_vector = self.song_embedding(inputs[1])
        dot_user_song = self.dot([user_vector, song_vector])
        return dot_user_song


In [None]:
# Prepare training data
user_ids = df['user_id'].unique()
song_ids = df['song_id'].unique()

user_id_map = {id: index for index, id in enumerate(user_ids)}
song_id_map = {id: index for index, id in enumerate(song_ids)}

df['user_id'] = df['user_id'].map(user_id_map)
df['song_id'] = df['song_id'].map(song_id_map)

num_users = len(user_ids)
num_songs = len(song_ids)

X_train = df[['user_id', 'song_id']].values
y_train = df['rating'].values

model = RecommenderNet(num_users, num_songs, 50)
model.compile(optimizer='adam', loss='mean_squared_error')

history = model.fit([X_train[:, 0], X_train[:, 1]], y_train, epochs=10, verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
import numpy as np

def recommend_songs(user_id, num_recommendations=10):
    user_index = user_id_map[user_id]
    song_indices = np.arange(num_songs)

    user_indices = np.array([user_index] * num_songs)
    predictions = model.predict([user_indices, song_indices])

    top_song_indices = np.argsort(predictions, axis=0)[-num_recommendations:].flatten()
    recommended_songs = [song_ids[index] for index in top_song_indices]
    return recommended_songs

# Get recommendations for a specific user
user_id = 1  # Example user_id
recommendations = recommend_songs(user_id)
print("Recommended songs for user", user_id, ":", recommendations)


Recommended songs for user 1 : [30, 50, 20, 40, 10]


In [None]:
import shutil
import os

# Create the .kaggle directory
os.makedirs('/root/.kaggle', exist_ok=True)

# Move kaggle.json file from the uploaded location to the .kaggle directory
shutil.move('/content/kaggle.json', '/root/.kaggle/kaggle.json')

# Change the permissions of the file
os.chmod('/root/.kaggle/kaggle.json', 600)

# Authenticate with Kaggle
from kaggle.api.kaggle_api_extended import KaggleApi

api = KaggleApi()
api.authenticate()


In [None]:
# Download dataset from Kaggle
dataset = 'neferfufi/lastfm'
api.dataset_download_files(dataset, path='.', unzip=True)


Dataset URL: https://www.kaggle.com/datasets/neferfufi/lastfm


In [None]:
import pandas as pd

# Load the dataset (adjust the file name if necessary)
filename = '/content/userid-timestamp-artid-artname-traid-traname.tsv'
df = pd.read_csv(filename, sep='\t', on_bad_lines='skip')

# Display column names
print(df.columns)

# Display the first few rows
df.head()



Index(['user_000001', '2009-05-04T23:08:57Z',
       'f1b1cf71-bd35-4e99-8624-24a6e15f133a', 'Deep Dish', 'Unnamed: 4',
       'Fuck Me Im Famous (Pacha Ibiza)-09-28-2007'],
      dtype='object')


Unnamed: 0,user_000001,2009-05-04T23:08:57Z,f1b1cf71-bd35-4e99-8624-24a6e15f133a,Deep Dish,Unnamed: 4,Fuck Me Im Famous (Pacha Ibiza)-09-28-2007
0,user_000001,2009-05-04T13:54:10Z,a7f7df4a-77d8-4f12-8acd-5c60c93f4de8,坂本龍一,,Composition 0919 (Live_2009_4_15)
1,user_000001,2009-05-04T13:52:04Z,a7f7df4a-77d8-4f12-8acd-5c60c93f4de8,坂本龍一,,Mc2 (Live_2009_4_15)
2,user_000001,2009-05-04T13:42:52Z,a7f7df4a-77d8-4f12-8acd-5c60c93f4de8,坂本龍一,,Hibari (Live_2009_4_15)
3,user_000001,2009-05-04T13:42:11Z,a7f7df4a-77d8-4f12-8acd-5c60c93f4de8,坂本龍一,,Mc1 (Live_2009_4_15)
4,user_000001,2009-05-04T13:38:31Z,a7f7df4a-77d8-4f12-8acd-5c60c93f4de8,坂本龍一,,To Stanford (Live_2009_4_15)


In [None]:
import pandas as pd

# Load the dataset with error handling
filename = '/content/userid-timestamp-artid-artname-traid-traname.tsv'
try:
    df = pd.read_csv(filename, sep='\t', header=None, error_bad_lines=False, warn_bad_lines=True)
except Exception as e:
    print(f"An error occurred: {e}")

# Manually assign the column names
df.columns = ['user_id', 'timestamp', 'artist_id', 'artist_name', 'unknown', 'track_name']

# Display column names
print(df.columns)

# Display the first few rows to inspect
df.head()




An error occurred: read_csv() got an unexpected keyword argument 'error_bad_lines'


ValueError: Length mismatch: Expected axis has 4 elements, new values have 6 elements

In [None]:
# Select the relevant columns and drop rows with missing values
df = df[['user_id', 'artist_name', 'track_name']].dropna()

# Display the first few rows after preprocessing
df.head()


Unnamed: 0,user_id,artist_name,track_name
0,user_000001,坂本龍一,Composition 0919 (Live_2009_4_15)
1,user_000001,坂本龍一,Mc2 (Live_2009_4_15)
2,user_000001,坂本龍一,Hibari (Live_2009_4_15)
3,user_000001,坂本龍一,Mc1 (Live_2009_4_15)
4,user_000001,坂本龍一,To Stanford (Live_2009_4_15)


In [None]:
!pip install scikit-surprise


Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357246 sha256=36a61bc6751f54dc5638c3084436fe41b63142ce2a6d68121a8df3a7e7df4c8b
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Succ

In [None]:
# Assign an implicit rating of 1 for each user-track interaction
df['rating'] = 1

subset_users = df['user_id'].unique()[:100]
df_subset = df[df['user_id'].isin(subset_users)]

# Verify the reduced DataFrame
print(df_subset.head())
print(df_subset.shape)
# Verify the new DataFrame
df.head()


       user_id artist_name                         track_name  rating
0  user_000001        坂本龍一  Composition 0919 (Live_2009_4_15)       1
1  user_000001        坂本龍一               Mc2 (Live_2009_4_15)       1
2  user_000001        坂本龍一            Hibari (Live_2009_4_15)       1
3  user_000001        坂本龍一               Mc1 (Live_2009_4_15)       1
4  user_000001        坂本龍一       To Stanford (Live_2009_4_15)       1
(19098642, 4)


Unnamed: 0,user_id,artist_name,track_name,rating
0,user_000001,坂本龍一,Composition 0919 (Live_2009_4_15),1
1,user_000001,坂本龍一,Mc2 (Live_2009_4_15),1
2,user_000001,坂本龍一,Hibari (Live_2009_4_15),1
3,user_000001,坂本龍一,Mc1 (Live_2009_4_15),1
4,user_000001,坂本龍一,To Stanford (Live_2009_4_15),1


In [None]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load the data into Surprise
reader = Reader(rating_scale=(1, 1))
data = Dataset.load_from_df(df[['user_id', 'track_name', 'rating']], reader)

# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

# Use the SVD algorithm
algo = SVD()

# Train the algorithm on the training set
algo.fit(trainset)

# Test the algorithm on the test set
predictions = algo.test(testset)

# Compute and print the RMSE
accuracy.rmse(predictions)


KeyboardInterrupt: 

In [None]:
# Function to get song recommendations for a user
def get_recommendations(user_id, n=10):
    # Get a list of all songs
    unique_songs = df['traname'].unique()

    # Predict ratings for all songs not yet rated by the user
    songs_not_rated = [song for song in unique_songs if not any(df[(df['userid'] == user_id) & (df['traname'] == song)].shape[0])]
    predictions = [algo.predict(user_id, song) for song in songs_not_rated]

    # Sort predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)

    # Return the top-n recommendations
    top_n = predictions[:n]
    return [(pred.iid, pred.est) for pred in top_n]

# Example usage
user_id = 'some_user_id'  # Replace with an actual user ID from the dataset
recommendations = get_recommendations(user_id)
for song, rating in recommendations:
    print(f'Song: {song}, Predicted Rating: {rating}')
