In [18]:
from transformers import pipeline
classifier = pipeline("text-classification",model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True)
prediction = classifier("I love using transformers. The best part is wide range of support and its easy to use", )
print(prediction)

[[{'label': 'sadness', 'score': 0.00067926972405985}, {'label': 'joy', 'score': 0.9959298968315125}, {'label': 'love', 'score': 0.0009452439262531698}, {'label': 'anger', 'score': 0.0018055178225040436}, {'label': 'fear', 'score': 0.000411103421356529}, {'label': 'surprise', 'score': 0.00022885671933181584}]]


In [19]:
from tqdm import tqdm
tqdm.pandas()

In [2]:
import pandas as pd

In [29]:
song_df = pd.read_csv('spotify_millsongdata.csv')
movie_df = pd.read_csv('wiki_movie_plots_deduped.csv')

In [22]:
def generate_emotion_metrics(text):
    # Define the maximum sequence length
    max_seq_length = 512
    
    # Split the text into chunks
    chunks = [text[i:i+max_seq_length] for i in range(0, len(text), max_seq_length)]
    
    # Initialize dictionaries to store emotion scores
    emotion_scores_combined = {'joy': 0, 'anger': 0, 'love': 0, 'sadness': 0, 'fear': 0, 'surprise': 0}
    
    # Iterate over the chunks and generate emotion scores
    for chunk in chunks:
        classifier = pipeline("text-classification", model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True)
        emotion_scores = classifier(chunk)
        
        # Aggregate emotion scores from each chunk
        for emotion_score_dict in emotion_scores[0]:
            emotion = emotion_score_dict['label']
            score = emotion_score_dict['score']
            emotion_scores_combined[emotion] += score
    
    # Normalize the aggregated emotion scores
    total_score = sum(emotion_scores_combined.values())
    emotion_metrics = {emotion: score / total_score for emotion, score in emotion_scores_combined.items()}
    
    return emotion_metrics


In [23]:
def process_chunk(chunk):
    chunk['emotion_metrics'] = chunk['text'].apply(generate_emotion_metrics)
    return chunk

In [24]:
song_df = song_df.dropna(subset=['text']).drop(columns=['link'])


In [25]:
print(song_df)

             artist                   song  \
0              ABBA  Ahe's My Kind Of Girl   
1              ABBA       Andante, Andante   
2              ABBA         As Good As New   
3              ABBA                   Bang   
4              ABBA       Bang-A-Boomerang   
...             ...                    ...   
57645  Ziggy Marley          Good Old Days   
57646  Ziggy Marley          Hand To Mouth   
57647          Zwan           Come With Me   
57648          Zwan                 Desire   
57649          Zwan              Heartsong   

                                                    text  
0      Look at her face, it's a wonderful face  \r\nA...  
1      Take it easy with me, please  \r\nTouch me gen...  
2      I'll never know why I had to go  \r\nWhy I had...  
3      Making somebody happy is a question of give an...  
4      Making somebody happy is a question of give an...  
...                                                  ...  
57645  Irie days come on play  \r\

In [26]:
chunk_size = 1000

# Calculate the number of chunks
num_chunks = len(song_df) // chunk_size + 1

# Process the DataFrame in chunks
result_chunks = []
for chunk_idx in tqdm(range(num_chunks)):
    start_idx = chunk_idx * chunk_size
    end_idx = min((chunk_idx + 1) * chunk_size, len(song_df))
    chunk = song_df.iloc[start_idx:end_idx].copy()
    result_chunks.append(process_chunk(chunk))

# Concatenate the result chunks into the final DataFrame
result_df = pd.concat(result_chunks)

100%|██████████| 58/58 [30:58:15<00:00, 1922.34s/it]


In [27]:
print(result_df)

             artist                   song  \
0              ABBA  Ahe's My Kind Of Girl   
1              ABBA       Andante, Andante   
2              ABBA         As Good As New   
3              ABBA                   Bang   
4              ABBA       Bang-A-Boomerang   
...             ...                    ...   
57645  Ziggy Marley          Good Old Days   
57646  Ziggy Marley          Hand To Mouth   
57647          Zwan           Come With Me   
57648          Zwan                 Desire   
57649          Zwan              Heartsong   

                                                    text  \
0      Look at her face, it's a wonderful face  \r\nA...   
1      Take it easy with me, please  \r\nTouch me gen...   
2      I'll never know why I had to go  \r\nWhy I had...   
3      Making somebody happy is a question of give an...   
4      Making somebody happy is a question of give an...   
...                                                  ...   
57645  Irie days come on pl

In [28]:
result_df.to_csv('songEmotion.csv', index=False)

In [31]:
movie_df = movie_df.drop_duplicates(subset=['Title', 'Plot'])
movie_df = movie_df.drop(['Origin/Ethnicity', 'Wiki Page'], axis=1)
movie_df = movie_df.dropna(subset=['Title', 'Plot'])
movie_df = movie_df[movie_df['Release Year'] > 1950]

In [32]:
print(movie_df)

       Release Year                                       Title  \
5285           1951                             The 13th Letter   
5286           1951  Abbott and Costello Meet the Invisible Man   
5287           1951                             Ace in the Hole   
5288           1951                    Across the Wide Missouri   
5289           1951                Adventures of Captain Fabian   
...             ...                                         ...   
34881          2014                           The Water Diviner   
34882          2017                          Çalgı Çengi İkimiz   
34883          2017                                Olanlar Oldu   
34884          2017                            Non-Transferable   
34885          2017                          İstanbul Kırmızısı   

                      Director  \
5285            Otto Preminger   
5286            Charles Lamont   
5287              Billy Wilder   
5288        William A. Wellman   
5289          William Mar

In [38]:
def mov_process_chunk(chunk):
    chunk['emotion_metrics'] = chunk['Plot'].apply(generate_emotion_metrics)
    return chunk

In [39]:
mov_chunk_size = 1000

# Calculate the number of chunks
mov_num_chunks = len(movie_df) // mov_chunk_size + 1

# Process the DataFrame in chunks
mov_result_chunks = []
for mov_chunk_idx in tqdm(range(mov_num_chunks)):
    mov_start_idx = mov_chunk_idx * mov_chunk_size
    mov_end_idx = min((mov_chunk_idx + 1) * mov_chunk_size, len(movie_df))
    mov_chunk = movie_df.iloc[mov_start_idx:mov_end_idx].copy()
    mov_result_chunks.append(mov_process_chunk(mov_chunk))

# Concatenate the result chunks into the final DataFrame
mov_result_df = pd.concat(mov_result_chunks)

100%|██████████| 28/28 [34:02:44<00:00, 4377.31s/it]


In [13]:
mov_result_df.to_csv('movieEmotion.csv', index=False)

NameError: name 'mov_result_df' is not defined

In [14]:
mov_final_df = pd.read_csv('movieEmotion.csv')
song_final_df = pd.read_csv('songEmotion.csv')

mov_final_df.drop(columns=['Plot'], inplace=True)
song_final_df.drop(columns=['text'], inplace=True)

In [4]:
print(mov_final_df)
print(song_final_df)

       Release Year                                       Title  \
0              1951                             The 13th Letter   
1              1951  Abbott and Costello Meet the Invisible Man   
2              1951                             Ace in the Hole   
3              1951                    Across the Wide Missouri   
4              1951                Adventures of Captain Fabian   
...             ...                                         ...   
27865          2014                           The Water Diviner   
27866          2017                          Çalgı Çengi İkimiz   
27867          2017                                Olanlar Oldu   
27868          2017                            Non-Transferable   
27869          2017                          İstanbul Kırmızısı   

                      Director  \
0               Otto Preminger   
1               Charles Lamont   
2                 Billy Wilder   
3           William A. Wellman   
4             William Mar

In [15]:
mov_final_df.dropna(subset=['emotion_metrics'], inplace=True)
song_final_df.dropna(subset=['emotion_metrics'], inplace=True)

print(mov_final_df)
print(song_final_df)

       Release Year                                       Title  \
0              1951                             The 13th Letter   
1              1951  Abbott and Costello Meet the Invisible Man   
2              1951                             Ace in the Hole   
3              1951                    Across the Wide Missouri   
4              1951                Adventures of Captain Fabian   
...             ...                                         ...   
27865          2014                           The Water Diviner   
27866          2017                          Çalgı Çengi İkimiz   
27867          2017                                Olanlar Oldu   
27868          2017                            Non-Transferable   
27869          2017                          İstanbul Kırmızısı   

                      Director  \
0               Otto Preminger   
1               Charles Lamont   
2                 Billy Wilder   
3           William A. Wellman   
4             William Mar

In [16]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from ast import literal_eval
import random
from scipy.sparse import csr_matrix


In [17]:
def convert_to_dict(emotion_str):
    return literal_eval(emotion_str)

# calculate emotion vector
def calculate_emotion_vector(emotion_dict):
    return np.array(list(emotion_dict.values()))

# calculate emotion vectors 
mov_final_df['Emotion Vector'] = mov_final_df['emotion_metrics'].apply(convert_to_dict).apply(calculate_emotion_vector)
song_final_df['Emotion Vector'] = song_final_df['emotion_metrics'].apply(convert_to_dict).apply(calculate_emotion_vector)


In [18]:
selected_movie = 'In the Mood for Love'
selected_movie_vector = mov_final_df.loc[mov_final_df['Title'] == selected_movie, 'Emotion Vector'].values[0]

# Convert the selected movie's emotion vector to a sparse matrix
selected_movie_vector_sparse = csr_matrix(selected_movie_vector)

# Convert emotion vectors of songs to a sparse matrix
songs_sparse_matrix = csr_matrix(song_final_df['Emotion Vector'].to_list())

# Calculate cosine similarity using sparse matrix operations
cosine_similarities = cosine_similarity(selected_movie_vector_sparse, songs_sparse_matrix)

# Extract top 200 similar songs
top_200_indices = np.argsort(cosine_similarities[0])[::-1][:200]
top_200_songs = song_final_df.iloc[top_200_indices]

# Randomly select 15 songs from the top 200
random.seed(42)  # for reproducibility, remove this line if you want different results each time
selected_songs = top_200_songs.sample(n=15)[['song', 'artist']]

print("Recommended Songs:")
for song, artist in selected_songs.values:
    print(f"{song} by {artist}")

Recommended Songs:
Rock Show by Lady Gaga
Ballad For A Friend by Bob Dylan
Give The Radio Back by Alice Cooper
Don't Leave Me by Green Day
The Fugitive by Iron Maiden
My Impression Now by Guided By Voices
Fear The Voices by Alice In Chains
Take Your Hand by Usher
Fallin' From The Sky by R. Kelly
Dangerous Times by Cher
Istanbul by Morrissey
World I Used To Know by Glen Campbell
Hurts Like Heaven by Coldplay
Somebody Might Wave Back by Waterboys
Scream Until You Like It by W.A.S.P.


In [25]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from config import CLIENT_ID, CLIENT_SECRET
import requests

# Initialize Spotify client
client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID,client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager, requests_timeout=30000)

In [38]:
def get_genre(artist_name):
        # Search for the artist
    results = sp.search(q=f'artist:{artist_name}', type='artist', limit=1)
    
    # Check if any results were found
    if results['artists']['items']:
        artist_info = results['artists']['items'][0]
        if artist_info['genres']:
            return artist_info['genres'][0]  # Returning the first genre found
        else:
            return "Genre not available"
    else:
        return "Artist not found"

In [41]:
from tqdm import tqdm
import time

# Define rate limiting parameters
requests_per_minute_limit = 1000  # Adjust according to Spotify API rate limits
minute_window = 60  # seconds (1 minute)

# Track the time when the last request was made
last_request_time = None
requests_sent_this_minute = 0

# Dictionary to cache genre information for artists
genre_cache = {}

def add_genre_to_df(df):
    global last_request_time, requests_sent_this_minute

    genres = []
    for artist_name in tqdm(df['artist'], desc='Processing artists'):
        # Check if rate limit has been reached
        if requests_sent_this_minute >= requests_per_minute_limit:
            # Calculate the time to delay before making the next request
            time_since_last_request = time.time() - last_request_time
            time_to_wait = max(minute_window - time_since_last_request, 0)
            print(f"Rate limit reached. Waiting for {time_to_wait} seconds before making next request...")
            time.sleep(time_to_wait)
            requests_sent_this_minute = 0  # Reset requests count for the new minute

        # Check if genre information is cached
        if artist_name in genre_cache:
            genre = genre_cache[artist_name]
        else:
            # Make the request
            try:
                genre = get_genre(artist_name)
                genre_cache[artist_name] = genre  # Cache the genre information
            except Exception as e:
                print(f"Error fetching genre for {artist_name}: {e}")
                genre = None  # Set genre to None if retrieval fails

            # Update request tracking variables
            requests_sent_this_minute += 1
            last_request_time = time.time()

        genres.append(genre)

    df['Genre'] = genres

# Call the function with your DataFrame
add_genre_to_df(song_final_df)


Processing artists: 100%|██████████| 57650/57650 [01:31<00:00, 626.78it/s]  


In [42]:
print(song_final_df)

             artist                   song  \
0              ABBA  Ahe's My Kind Of Girl   
1              ABBA       Andante, Andante   
2              ABBA         As Good As New   
3              ABBA                   Bang   
4              ABBA       Bang-A-Boomerang   
...             ...                    ...   
57645  Ziggy Marley          Good Old Days   
57646  Ziggy Marley          Hand To Mouth   
57647          Zwan           Come With Me   
57648          Zwan                 Desire   
57649          Zwan              Heartsong   

                                         emotion_metrics  \
0      {'joy': 0.9988289208827446, 'anger': 0.0001545...   
1      {'joy': 0.6534017489030036, 'anger': 0.0038670...   
2      {'joy': 0.6008874079069335, 'anger': 0.0056411...   
3      {'joy': 0.008357059498882647, 'anger': 0.33226...   
4      {'joy': 0.01621546563623756, 'anger': 0.331428...   
...                                                  ...   
57645  {'joy': 0.9954145554

In [43]:
print(song_final_df['Genre'].unique())

['europop' 'eurodance' 'comic' 'british soul' 'album rock' 'soft rock'
 'opm' 'contemporary country' 'operatic pop' 'alternative metal'
 'bluegrass' 'new romantic' 'christian music' 'classical tenor'
 'adult standards' 'pop' 'classic opm' 'country rock' 'canadian pop'
 'boy band' 'Genre not available' 'baroque pop' 'british invasion'
 'britpop' 'movie tunes' 'disco' 'classic soul' 'alternative rock'
 'classic rock' 'reggae' 'glam metal' 'folk rock' 'dance pop'
 'heartland rock' 'classic oklahoma country' 'british folk' 'r&b' 'celtic'
 'blues' 'permanent wave' 'tin pan alley' 'arkansas country' 'neo mellow'
 'australian rock' 'jam band' 'country' 'art rock' 'big room' 'dance rock'
 'classic indonesian rock' 'desi hip hop' 'classic country pop'
 'classic praise' 'canadian hip hop' 'metal' 'rock-and-roll'
 'indietronica' 'glam rock' 'new wave' 'detroit hip hop' 'banda'
 'classic finnish rock' 'jazz blues' 'melodic drill' 'post-grunge' 'k-pop'
 'funk metal' 'gangster rap' 'punk' 'blues roc

In [46]:
edit_song_df = song_final_df[~song_final_df['Genre'].str.contains('metal', case='False')]

In [47]:
print(edit_song_df)

             artist                   song  \
0              ABBA  Ahe's My Kind Of Girl   
1              ABBA       Andante, Andante   
2              ABBA         As Good As New   
3              ABBA                   Bang   
4              ABBA       Bang-A-Boomerang   
...             ...                    ...   
57645  Ziggy Marley          Good Old Days   
57646  Ziggy Marley          Hand To Mouth   
57647          Zwan           Come With Me   
57648          Zwan                 Desire   
57649          Zwan              Heartsong   

                                         emotion_metrics  \
0      {'joy': 0.9988289208827446, 'anger': 0.0001545...   
1      {'joy': 0.6534017489030036, 'anger': 0.0038670...   
2      {'joy': 0.6008874079069335, 'anger': 0.0056411...   
3      {'joy': 0.008357059498882647, 'anger': 0.33226...   
4      {'joy': 0.01621546563623756, 'anger': 0.331428...   
...                                                  ...   
57645  {'joy': 0.9954145554

In [48]:
edit_song_df.to_csv('songEdited.csv', index=False)