In [None]:
import pandas as pd
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

# Download NLTK resources if not downloaded
nltk.download('punkt')

# Read the CSV file into a pandas DataFrame
music = pd.read_csv("spotify_millsongdata.csv")

# Sample 20,000 rows, drop 'link' column, and reset index
music = music.sample(20000).drop('link', axis=1).reset_index(drop=True)

# Text cleaning and stemming
stemmer = PorterStemmer()

def clean_and_stem(text):
    text = text.lower()
    text = text.replace('\n', ' ')
    tokens = nltk.word_tokenize(text)
    stemmed_tokens = [stemmer.stem(token) for token in tokens]
    return " ".join(stemmed_tokens)

music['text'] = music['text'].apply(clean_and_stem)

# TF-IDF Vectorization
TfidfVector = TfidfVectorizer(analyzer='word', stop_words='english')
matrix = TfidfVector.fit_transform(music['text'])
similarity = cosine_similarity(matrix)

# Save 'similarity' matrix to a file using pickle
with open('similarity.pkl', 'wb') as similarity_file:
    pickle.dump(similarity, similarity_file)

# Save 'music' DataFrame to a file using pickle
with open('music.pkl', 'wb') as music_file:
    pickle.dump(music, music_file)

# Load 'similarity' matrix and 'music' DataFrame (for verification purposes)
with open('similarity.pkl', 'rb') as similarity_file:
    loaded_similarity = pickle.load(similarity_file)

with open('music.pkl', 'rb') as music_file:
    loaded_music = pickle.load(music_file)

# Verifying the recommendation function with loaded data
def recommendation(song_name):
    idx = loaded_music[loaded_music['song'] == song_name].index[0]
    distances = sorted(list(enumerate(loaded_similarity[idx])), reverse=True, key=lambda x: x[1])
    songs = [loaded_music.iloc[s_id[0]]['song'] for s_id in distances[1:5]]
    return songs

# Test the recommendation function with a song name
recommended_songs = recommendation('Crying Over You')
print("Recommended songs for 'Crying Over You':", recommended_songs)
