## 1. Prepare data to run model

In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb  
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report, precision_score, recall_score, f1_score, accuracy_score
import itertools
from sklearn.impute import SimpleImputer

## 2. Get Model to predict Hit/Flop

Loading the Model: You'll need to load the pre-trained prediction model. If it's pickled, you can load it using pickle.

In [37]:
# import model
import pickle

# Load the model from the .pkl file
with open('xgb_model_genre.pkl', 'rb') as file:
    model = pickle.load(file)

## 3. Build code to give recommendation for a feature

In [38]:
data_tofindtrack = pd.read_csv('Spotify Data/data-clean.csv')

#create duration_ms
data_tofindtrack['track_seconds'] = data_tofindtrack['duration_ms'] / 1000

# Drop unnecessary columns
data_tofindtrack = data_tofindtrack.drop(["era", "key", "popularity", "mode", 'duration_ms', "tiktok", "spotify", "track", "artist"], axis=1)
data_tofindtrack['main_parent_genre'] = data_tofindtrack['main_parent_genre'].astype('category')
data_tofindtrack['main_parent_genre'] = data_tofindtrack['main_parent_genre'].cat.codes


data_tofindtrack

Unnamed: 0,track_id,time_signature,chorus_hit,sections,target,sm_target,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,main_parent_genre,track_seconds
0,1dtKN6wwlolkM8XZy2y9C1,3.0,32.94975,9.0,1.0,0.0,0.417,0.620,-7.727,0.0403,0.4900,0.000000,0.0779,0.8450,185.655,0,173.533
1,5hjsmSnUefdUqzsDogisiX,4.0,48.82510,10.0,0.0,0.0,0.498,0.505,-12.475,0.0337,0.0180,0.107000,0.1760,0.7970,101.801,8,213.613
2,6uk8tI6pwxxdVTNlNOJeJh,4.0,37.22663,12.0,0.0,0.0,0.657,0.649,-13.392,0.0380,0.8460,0.000004,0.1190,0.9080,115.940,4,223.960
3,7aNjMJ05FvUXACPWZ7yJmv,4.0,24.75484,8.0,0.0,0.0,0.590,0.545,-12.058,0.1040,0.7060,0.024600,0.0610,0.9670,105.592,9,157.907
4,1rQ0clvgkzWr001POOPJWx,4.0,21.79874,14.0,0.0,0.0,0.515,0.765,-3.515,0.1240,0.8570,0.000872,0.2130,0.9060,114.617,4,245.600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40555,4t1TljQWJ6ZuoSY67zVvBI,4.0,24.30824,7.0,0.0,0.0,0.172,0.358,-14.430,0.0342,0.8860,0.966000,0.3140,0.0361,72.272,4,150.857
40556,2MShy1GSSgbmGUxADNIao5,4.0,32.53856,8.0,1.0,0.0,0.910,0.366,-9.954,0.0941,0.0996,0.000000,0.2610,0.7400,119.985,5,152.000
40557,55qBw1900pZKfXJ6Q9A2Lc,4.0,20.73371,7.0,1.0,0.0,0.719,0.804,-4.581,0.0355,0.0132,0.000003,0.1390,0.6050,119.999,5,227.760
40558,4o9npmYHrOF1rUxxTVH8h4,4.0,21.65301,14.0,0.0,0.0,0.600,0.177,-16.070,0.0561,0.9890,0.868000,0.1490,0.5600,120.030,0,213.387


In [58]:
# define values to multiply in iteration (here percentage)
values = [1, 0.8, 1.2, 0.6, 1.4, 0.4, 1.6, 0.2]
# method to tune feature and say if it gets to a HIT
def checkfeature (insert_feature, song):
    feature = insert_feature
    for value in values:
        song_copy = song.copy()  # Create a copy of the DataFrame
        song_copy.head()
        song_copy[feature] = song_copy[feature] * value 
        pred = model.predict(song_copy)
        #print(value)

        if pred[0] > 0: # if a HIT is reached give print statement
            print ("HIT reached")
            if value == 1:
                print ("Your song already is a HIT")
            else:
                print ("You have to change " + str(feature) +" by " + str(value))
            return value

In [40]:
# if it's a success return 1
def success (insert_feature, song_df):
    if checkfeature(insert_feature, song_df ) is None:
        print ("no success with " + str(insert_feature))
        return 0
    print ("success")
    return 1


In [41]:
# Check for a song location
# songs_df = songs_df = data.drop("target", axis=1)
# selected_song_df = songs_df.iloc[40555: 40556]
#pred = model.predict(selected_song_df)
#print (pred[0])

In [42]:
# Handling missing values
imputer = SimpleImputer(strategy='mean')  # Replace missing values with the mean

# Check for a songs track_id
def id_to_df (track_id):
    track_df = data_tofindtrack[data_tofindtrack['track_id'] == track_id]
    track_df = track_df.drop(['target', "track_id"], axis=1)
    #track_df = imputer.fit_transform(track_df)
    #print (track_df.type)
    return track_df

#id_to_df("4t1TljQWJ6ZuoSY67zVvBI")

In [43]:
#song_df = id_to_df("5SlHTXLWa0nvh8VtGgKlJB")
#pred = model.predict(song_df)

In [63]:
tuningfeatures = ["loudness", "danceability", "acousticness", "chorus_hit","sections", 
                  "energy", "speechiness","instrumentalness","liveness",
                  "valence","tempo"]
# "track_seconds"

# THIS IS THE METHOD TO INSERT A TRACK ID AND GET THE FEEDBACK WHAT TO TUNE
def test (track_id):
    song_df = id_to_df(track_id)
    for feature in tuningfeatures:
        if success (feature, song_df) ==1:
            print ("you have reached a HIT")
            return 1

test ("6uk8tI6pwxxdVTNlNOJeJh")

no success with loudness
no success with danceability
HIT reached
You have to change acousticness by 0.8
success
you have reached a HIT




1

## Rest

Creating a Function to Adjust Features: This function will receive a feature value and a percentage change and return the adjusted value.

In [48]:
def adjust_feature(value, percentage):
    return value + (value * percentage / 100)

Testing Feature Adjustments: For each feature of a song, you'll want to test how increasing and decreasing it affects the model's prediction.

In [49]:
#def test_adjustments(song_features, percentage_range):
optimal_changes = {}
    for feature, value in song_features.items():
        input_data = np.array(list(song_features.values()))
        max_prob = predict_proba_gb(xgb2, [input_data])[0][1]  # probability of being a hit
        optimal_change = 0  # no change

        for percentage in percentage_range:
            # Increase feature
            # Increase feature
            adjusted_features = song_features.copy()
            adjusted_features[feature] = adjust_feature(value, percentage)
            input_data = np.array(list(adjusted_features.values()))
            prob = predict_proba_gb(xgb2, [input_data])[0][1]
            
            if prob > max_prob:
                max_prob = prob
            optimal_change = percentage
            
            #adjusted_features = song_features.copy()
            #adjusted_features[feature] = adjust_feature(value, percentage)
            #prob = model.predict_proba([adjusted_features])[0][1]
            #if prob > max_prob:
                #max_prob = prob
                #optimal_change = percentage

            # Decrease feature
            adjusted_features = song_features.copy()
            adjusted_features[feature] = adjust_feature(value, -percentage)
            input_data = np.array(list(adjusted_features.values()))
            prob = predict_proba_gb(xgb2, [input_data])[0][1]
            if prob > max_prob:
                max_prob = prob
            optimal_change = -percentage

            #adjusted_features = song_features.copy()
            #adjusted_features[feature] = adjust_feature(value, -percentage)
            #prob = model.predict_proba([adjusted_features])[0][1]
            #if prob > max_prob:
                #max_prob = prob
                #optimal_change = -percentage

        optimal_changes[feature] = optimal_change

    return optimal_changes


IndentationError: unexpected indent (1133524161.py, line 3)

Note: The percentage_range parameter in this function could be something like range(-50, 51, 5), which would test each feature by decreasing and increasing it by 5% increments, up to 50%.

Providing Recommendations: Use the test_adjustments function to provide recommendations on how to improve a song.

In [None]:
# song_features = {'time_signature': 4.0, 'chorus_hit': 24.30824, 'sections': 7.0,
#                 "danceability": 0.0, "energy": 0.172, "loudness":-14.430,
#                   "speechiness": 0.0403, "acousticness":0.4900, "instrumentalness": 0.0000,
#                     "liveness": 0.0779, "valence": 0.8450, "tempo": 185.655, "track_second": 173.533}  # example song features

song_features = {'time_signature': 0.0, 'chorus_hit': 0, 'sections': 0,
                "danceability": 0, "energy": 0, "loudness":0.0,
                  "speechiness": 0.0, "acousticness":0.0, "instrumentalness": 0.0,                     
                  "liveness": 0.0, "valence": 0.0, "tempo": 0, "track_second": 0}  # example song features


recommendations = test_adjustments(song_features, range(0, 21, 5))

for feature, change in recommendations.items():
    if change > 0:
        print(f"Increase {feature} by {change}% for a higher probability of being a hit.")
    elif change < 0:
        print(f"Decrease {feature} by {-change}% for a higher probability of being a hit.")
    else:
        print(f"{feature} is optimal as it is.")


Decrease time_signature by 20% for a higher probability of being a hit.
Decrease chorus_hit by 20% for a higher probability of being a hit.
Decrease sections by 20% for a higher probability of being a hit.
Decrease danceability by 20% for a higher probability of being a hit.
Decrease energy by 20% for a higher probability of being a hit.
Decrease loudness by 20% for a higher probability of being a hit.
Decrease speechiness by 20% for a higher probability of being a hit.
Decrease acousticness by 20% for a higher probability of being a hit.
Decrease instrumentalness by 20% for a higher probability of being a hit.
Decrease liveness by 20% for a higher probability of being a hit.
Decrease valence by 20% for a higher probability of being a hit.
Decrease tempo by 20% for a higher probability of being a hit.
Decrease track_second by 20% for a higher probability of being a hit.




This will provide specific, actionable recommendations on how to adjust each song feature for a higher probability of being a hit, according to your prediction model. However, keep in mind that the model's performance and recommendations will only be as good as the data it was trained on. Always use a variety of data sources and constantly update your model for the best results.