# MySpotify

## Install requirements

In [None]:
%pip install -r requirements.txt

## Initilizing MySpotify Module

In [None]:
%reload_ext autoreload
%autoreload 2

DEBUG_MODE = True

from MySpotify import MySpotify, os
import requests

# Need always to reload the link to download the data
if not os.path.exists("data.zip"):
    download_link = ""

    response = requests.get(download_link)
    with open("data.zip", "wb") as file:
        file.write(response.content)


SpotifyModule = MySpotify("data.zip", DEBUG_MODE)

## Preprocessing Data

### Convert files from csv to parquet

In [None]:
SpotifyModule.convert_files()

### Merge the Data into one parquet file

In [None]:
SpotifyModule.PreProcess_Data()

## Display DataFrame

In [None]:
from IPython.display import HTML, display, clear_output

def display_scrollable_df(df, height=400):
    html = df.to_html()
    scrollable = f'<div style="height: {height}px; overflow: auto;">{html}</div>'
    display(HTML(scrollable))

## Top Tracks

### Top 100 tracks by the play count

In [None]:
TopTracksNum = 100
best_tracks = SpotifyModule.get_Top_Tracks(TopTracksNum)
display_scrollable_df(best_tracks)

### Top 100 tracks by genre

In [None]:

TopTracksByGenreNum = 100
genre_list = ["Rock", "Rap", "Jazz", "Electronic", "Pop", "Blues", "Country", "Reggae", "New Age"]
for genre in genre_list:
    clear_output(wait=True)
    best_tracks_by_genre = SpotifyModule.Get_TopTracks_By_Genre(TopTracksByGenreNum, genre)
    display_scrollable_df(best_tracks_by_genre)
    key = input("Press Enter to continue to the next genre, or type something and press Enter to exit: ")
    if key != "":
        break


## Collections

### Using Baseline

In [None]:
Collections = ["love", "war","happiness", "loneliness", "money"]
Num_of_tracks = 50
for theme in Collections:
    clear_output(wait=True)
    Baseline = SpotifyModule.Baseline(theme, Num_of_tracks)
    display_scrollable_df(Baseline)
    key = input("Press Enter to continue to the next genre, or type something and press Enter to exit: ")
    if key != "":
        break

### Using Word2Vec

In [None]:
Collections = ["love", "war","happiness", "loneliness", "money"]
Num_of_tracks = 50
for theme in Collections:
    clear_output(wait=True)
    Word2Vec = SpotifyModule.Word2Vec(theme, Num_of_tracks)
    display_scrollable_df(Word2Vec)
    key = input("Press Enter to continue to the next genre, or type something and press Enter to exit: ")
    if key != "":
        break

### Using Classifier

In [None]:
Collections = ["love", "war","happiness", "loneliness", "money"]
# Num_of_tracks = 50
# for theme in Collections:
#     clear_output(wait=True)
#     Classification = SpotifyModule.Classification(theme, Num_of_tracks)
#     display_scrollable_df(Classification)
#     key = input("Press Enter to continue to the next genre, or type something and press Enter to exit: ")
#     if key != "":
#         break

# Classification = SpotifyModule.Classification(theme, Num_of_tracks)
Classification = SpotifyModule.Classification(Collections, "")

In [None]:
display_scrollable_df(SpotifyModule._Labeled_tracks)

In [None]:
from pyarrow.parquet import ParquetFile
import pandas as pd

pf = ParquetFile("data/parquet/play_count.parquet")

df : pd.DataFrame = pf.read().to_pandas()

In [None]:
print(len(df))

In [None]:
from nltk.corpus import wordnet
from nltk.stem import PorterStemmer
from spellchecker import SpellChecker

def get_word_themes(word):
    stemmer = PorterStemmer()
    word = stemmer.stem(word)
    spell = SpellChecker()
    word = spell.correction(word)
    synsets = wordnet.synsets(word)

    synonyms = set()
    for synset in synsets:
        for lemma in synset.lemma_names():
            synonyms.add(lemma)

    return list(synonyms)

print("Synsets for 'happiness':", get_word_themes("happiness"))

In [None]:
from pyarrow.parquet import ParquetFile
pf = ParquetFile("data/parquet/mxm_dataset_train.parquet")
print(pf.schema.names)
df.head(0)