In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from boruta import BorutaPy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE, SelectKBest, f_regression
from sklearn.linear_model import LinearRegression, Lasso
import warnings

warnings.filterwarnings("ignore")

# Load the dataset
dataframe = pd.read_csv('Source/out.csv')

features = ["acousticness", "danceability", "duration_ms", "energy", "instrumentalness", "key", "liveness",
            "mode", "speechiness", "tempo", "time_signature", "valence", 'loudness',"popularity"]

# korelasyon tabanlı feature engineering
dataframe['energy_loudness_ratio'] = dataframe['energy'] / dataframe['loudness']
dataframe['acoustic_energy_ratio'] = dataframe['acousticness'] / dataframe['energy']
dataframe['loudness_instrumentalness_diff'] = abs(dataframe['loudness'] - dataframe['instrumentalness'])
dataframe['energy_instrumentalness_diff'] = abs(dataframe['energy'] - dataframe['instrumentalness'])
dataframe['danceability_energy_diff'] = abs(dataframe['danceability'] - dataframe['energy'])
dataframe['duration_energy_density'] = dataframe['duration_ms'] / dataframe['energy']

#istatistiksel feature engineering
dataframe['mean'] = dataframe[features].mean(axis=1)
dataframe['std'] = dataframe[features].std(axis=1)
dataframe['var'] = dataframe[features].var(axis=1)
dataframe['median'] = dataframe[features].median(axis=1)
dataframe['min'] = dataframe[features].min(axis=1)
dataframe['max'] = dataframe[features].max(axis=1)
dataframe['sum'] = dataframe[features].sum(axis=1)
dataframe['kurtosis'] = dataframe[features].kurtosis(axis=1)
dataframe['skewness'] = dataframe[features].skew(axis=1)
dataframe['zero_crossings'] = (np.diff(np.sign(dataframe[features].values), axis=1) != 0).sum(axis=1)
dataframe['slope_sign_changes'] = (np.diff(np.sign(np.gradient(dataframe[features].values, axis=1)), axis=1) != 0).sum(axis=1)


# Use the updated dataframe for further analysis
# Tüm özellikleri içeren güncellenmiş dataframe'i kullanarak ilerleyen analizlerde kullanabilirsiniz
dataframe = dataframe[features + ['energy_loudness_ratio', 'acoustic_energy_ratio', 'loudness_instrumentalness_diff', 'energy_instrumentalness_diff', 'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'min', 'max', 'sum', 'kurtosis', 'skewness', 'zero_crossings', 'slope_sign_changes']]
dataframe = dataframe.replace([np.inf, -np.inf], np.nan).dropna()

data = dataframe


selected_features = ['acousticness', 'danceability', 'duration_ms', 'instrumentalness', 'speechiness', 'tempo',
                     'valence', 'energy_loudness_ratio', 'acoustic_energy_ratio', 'energy_instrumentalness_diff',
                     'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'max',
                     'sum', 'kurtosis', 'skewness','popularity']
data = dataframe[selected_features]


data.to_csv('Source/new_out2.csv', index=False)



In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from boruta import BorutaPy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE, SelectKBest, f_regression
from sklearn.linear_model import LinearRegression, Lasso
import warnings

warnings.filterwarnings("ignore")

# Load the dataset
dataframe = pd.read_csv('Source/dataset.csv')

features = ["acousticness", "danceability", "duration_ms", "energy", "instrumentalness", "key", "liveness",
            "mode", "speechiness", "tempo", "time_signature", "valence", 'loudness',"popularity"]

# korelasyon tabanlı feature engineering
dataframe['energy_loudness_ratio'] = dataframe['energy'] / dataframe['loudness']
dataframe['acoustic_energy_ratio'] = dataframe['acousticness'] / dataframe['energy']
dataframe['loudness_instrumentalness_diff'] = abs(dataframe['loudness'] - dataframe['instrumentalness'])
dataframe['energy_instrumentalness_diff'] = abs(dataframe['energy'] - dataframe['instrumentalness'])
dataframe['danceability_energy_diff'] = abs(dataframe['danceability'] - dataframe['energy'])
dataframe['duration_energy_density'] = dataframe['duration_ms'] / dataframe['energy']

#istatistiksel feature engineering
dataframe['mean'] = dataframe[features].mean(axis=1)
dataframe['std'] = dataframe[features].std(axis=1)
dataframe['var'] = dataframe[features].var(axis=1)
dataframe['median'] = dataframe[features].median(axis=1)
dataframe['min'] = dataframe[features].min(axis=1)
dataframe['max'] = dataframe[features].max(axis=1)
dataframe['sum'] = dataframe[features].sum(axis=1)
dataframe['kurtosis'] = dataframe[features].kurtosis(axis=1)
dataframe['skewness'] = dataframe[features].skew(axis=1)
dataframe['zero_crossings'] = (np.diff(np.sign(dataframe[features].values), axis=1) != 0).sum(axis=1)
dataframe['slope_sign_changes'] = (np.diff(np.sign(np.gradient(dataframe[features].values, axis=1)), axis=1) != 0).sum(axis=1)


# Use the updated dataframe for further analysis
# Tüm özellikleri içeren güncellenmiş dataframe'i kullanarak ilerleyen analizlerde kullanabilirsiniz
dataframe = dataframe[features + ['energy_loudness_ratio', 'acoustic_energy_ratio', 'loudness_instrumentalness_diff', 'energy_instrumentalness_diff', 'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'min', 'max', 'sum', 'kurtosis', 'skewness', 'zero_crossings', 'slope_sign_changes']]
dataframe = dataframe.replace([np.inf, -np.inf], np.nan).dropna()

data = dataframe


selected_features = ['acousticness', 'danceability', 'duration_ms', 'instrumentalness', 'speechiness', 'tempo',
                     'valence', 'energy_loudness_ratio', 'acoustic_energy_ratio', 'energy_instrumentalness_diff',
                     'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'max',
                     'sum', 'kurtosis', 'skewness','popularity']
data = dataframe[selected_features]


data.to_csv('Source/new_dataset2.csv', index=False)



In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from boruta import BorutaPy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE, SelectKBest, f_regression
from sklearn.linear_model import LinearRegression, Lasso
import warnings

warnings.filterwarnings("ignore")

# Load the dataset
dataframe = pd.read_csv('Source/tracks.csv')

features = ["acousticness", "danceability", "duration_ms", "energy", "instrumentalness", "key", "liveness",
            "mode", "speechiness", "tempo", "time_signature", "valence", 'loudness',"popularity"]

# korelasyon tabanlı feature engineering
dataframe['energy_loudness_ratio'] = dataframe['energy'] / dataframe['loudness']
dataframe['acoustic_energy_ratio'] = dataframe['acousticness'] / dataframe['energy']
dataframe['loudness_instrumentalness_diff'] = abs(dataframe['loudness'] - dataframe['instrumentalness'])
dataframe['energy_instrumentalness_diff'] = abs(dataframe['energy'] - dataframe['instrumentalness'])
dataframe['danceability_energy_diff'] = abs(dataframe['danceability'] - dataframe['energy'])
dataframe['duration_energy_density'] = dataframe['duration_ms'] / dataframe['energy']

#istatistiksel feature engineering
dataframe['mean'] = dataframe[features].mean(axis=1)
dataframe['std'] = dataframe[features].std(axis=1)
dataframe['var'] = dataframe[features].var(axis=1)
dataframe['median'] = dataframe[features].median(axis=1)
dataframe['min'] = dataframe[features].min(axis=1)
dataframe['max'] = dataframe[features].max(axis=1)
dataframe['sum'] = dataframe[features].sum(axis=1)
dataframe['kurtosis'] = dataframe[features].kurtosis(axis=1)
dataframe['skewness'] = dataframe[features].skew(axis=1)
dataframe['zero_crossings'] = (np.diff(np.sign(dataframe[features].values), axis=1) != 0).sum(axis=1)
dataframe['slope_sign_changes'] = (np.diff(np.sign(np.gradient(dataframe[features].values, axis=1)), axis=1) != 0).sum(axis=1)


# Use the updated dataframe for further analysis
# Tüm özellikleri içeren güncellenmiş dataframe'i kullanarak ilerleyen analizlerde kullanabilirsiniz
dataframe = dataframe[features + ['energy_loudness_ratio', 'acoustic_energy_ratio', 'loudness_instrumentalness_diff', 'energy_instrumentalness_diff', 'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'min', 'max', 'sum', 'kurtosis', 'skewness', 'zero_crossings', 'slope_sign_changes']]
dataframe = dataframe.replace([np.inf, -np.inf], np.nan).dropna()

data = dataframe


selected_features = ['acousticness', 'danceability', 'duration_ms', 'instrumentalness', 'speechiness', 'tempo',
                     'valence', 'energy_loudness_ratio', 'acoustic_energy_ratio', 'energy_instrumentalness_diff',
                     'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'max',
                     'sum', 'kurtosis', 'skewness','popularity']
data = dataframe[selected_features]


data.to_csv('Source/new_tracks2.csv', index=False)

