In [None]:
!pip install boruta

Collecting boruta
  Downloading Boruta-0.3-py3-none-any.whl (56 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/56.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.6/56.6 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: boruta
Successfully installed boruta-0.3


In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sun Jun  4 12:37:19 2023

@author: Tahsin
"""
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from boruta import BorutaPy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE, SelectKBest, f_regression
from sklearn.linear_model import LinearRegression, Lasso
import warnings

warnings.filterwarnings("ignore")

# Load the dataset
dataframe = pd.read_csv('/content/drive/MyDrive/bitirme/out.csv')

features = ["acousticness", "danceability", "duration_ms", "energy", "instrumentalness", "key", "liveness",
            "mode", "speechiness", "tempo", "time_signature", "valence", 'loudness',"popularity"]

# korelasyon tabanlı feature engineering
dataframe['energy_loudness_ratio'] = dataframe['energy'] / dataframe['loudness']
dataframe['acoustic_energy_ratio'] = dataframe['acousticness'] / dataframe['energy']
dataframe['loudness_instrumentalness_diff'] = abs(dataframe['loudness'] - dataframe['instrumentalness'])
dataframe['energy_instrumentalness_diff'] = abs(dataframe['energy'] - dataframe['instrumentalness'])
dataframe['danceability_energy_diff'] = abs(dataframe['danceability'] - dataframe['energy'])
dataframe['duration_energy_density'] = dataframe['duration_ms'] / dataframe['energy']

'''
energy_loudness_ratio: Bu özellik, enerji değerini ses şiddetiyle (loudness) böler. Enerji değeri yüksek olan parçaların genellikle daha yüksek bir ses şiddeti olduğu düşünülür.

acoustic_energy_ratio: Bu özellik, akustiklik (acousticness) değerini enerjiyle böler. Akustiklik değeri yüksek olan parçaların enerji seviyelerine göre ne kadar akustik olduklarını gösterir.

loudness_instrumentalness_diff: Bu özellik, ses şiddeti ile enstrümantallik (instrumentalness) değeri arasındaki farkı hesaplar. Bu fark, bir parçanın enstrümantal olma derecesi ile ses şiddeti arasındaki ilişkiyi yansıtır.

energy_instrumentalness_diff: Bu özellik, enerji seviyesi ile enstrümantallik değeri arasındaki farkı hesaplar. Bu fark, bir parçanın enstrümantal olma derecesi ile enerji seviyesi arasındaki ilişkiyi gösterir.

danceability_energy_diff: Bu özellik, dans edilebilirlik değerini enerji seviyesinden çıkarır. Bu fark, bir parçanın dans edilebilirlik özelliği ile enerji seviyesi arasındaki ilişkiyi ifade eder.

duration_energy_density: Bu özellik, parça süresini enerji seviyesine böler ve enerji yoğunluğunu temsil eder. Bu özellik, bir parçanın ne kadar süre boyunca enerjik olduğunu ve enerji seviyesinin süre üzerindeki dağılımını gösterir.

'''

#istatistiksel feature engineering
dataframe['mean'] = dataframe[features].mean(axis=1)
dataframe['std'] = dataframe[features].std(axis=1)
dataframe['var'] = dataframe[features].var(axis=1)
dataframe['median'] = dataframe[features].median(axis=1)
dataframe['min'] = dataframe[features].min(axis=1)
dataframe['max'] = dataframe[features].max(axis=1)
dataframe['sum'] = dataframe[features].sum(axis=1)
dataframe['kurtosis'] = dataframe[features].kurtosis(axis=1)
dataframe['skewness'] = dataframe[features].skew(axis=1)
dataframe['zero_crossings'] = (np.diff(np.sign(dataframe[features].values), axis=1) != 0).sum(axis=1)
dataframe['slope_sign_changes'] = (np.diff(np.sign(np.gradient(dataframe[features].values, axis=1)), axis=1) != 0).sum(axis=1)

'''

mean: Özelliklerin ortalama değeri hesaplanır. Her veri noktasının özelliklerine göre ortalama değeri temsil eder.
std: Özelliklerin standart sapması hesaplanır. Her veri noktasının özelliklerine göre standart sapma değeri temsil eder.
var: Özelliklerin varyansı hesaplanır. Her veri noktasının özelliklerine göre varyans değeri temsil eder.
median: Özelliklerin ortanca değeri hesaplanır. Her veri noktasının özelliklerine göre ortanca değeri temsil eder.
min: Özelliklerin minimum değeri hesaplanır. Her veri noktasının özelliklerine göre minimum değeri temsil eder.
max: Özelliklerin maksimum değeri hesaplanır. Her veri noktasının özelliklerine göre maksimum değeri temsil eder.
sum: Özelliklerin toplamı hesaplanır. Her veri noktasının özelliklerine göre toplam değeri temsil eder.
kurtosis: Özelliklerin kurtosis değeri hesaplanır. Kurtosis, özelliklerin dağılımının sivrilik veya basıklık ölçüsünü temsil eder.
skewness: Özelliklerin skewness değeri hesaplanır. Skewness, özelliklerin dağılımının simetrisini temsil eder.
zero_crossings: Özelliklerin sıfır geçiş sayısı hesaplanır. Sıfır geçişleri, veri noktalarında özellik değerinin sıfırdan pozitif veya negatif değere geçişini temsil eder.
slope_sign_changes: Özelliklerin eğim işaret değişiklik sayısı hesaplanır. İşaret değişiklikleri, veri noktalarında özellik değerinin yükselme veya düşme eğiminde işaretin değiştiğini temsil eder.
'''

'''
Bu hesaplamalar, veri setindeki özelliklere ilişkin ek bilgiler elde etmek ve özelliklerin dağılımını, eğilimini veya diğer özelliklerini temsil etmek için kullanılır.
 Bu şekilde, makine öğrenmesi modellerine daha fazla bilgi sağlanabilir veya veri setindeki örüntüler daha iyi anlaşılabilir hale getirilebilir.
'''


# Use the updated dataframe for further analysis
# Tüm özellikleri içeren güncellenmiş dataframe'i kullanarak ilerleyen analizlerde kullanabilirsiniz
dataframe = dataframe[features + ['energy_loudness_ratio', 'acoustic_energy_ratio', 'loudness_instrumentalness_diff', 'energy_instrumentalness_diff', 'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'min', 'max', 'sum', 'kurtosis', 'skewness', 'zero_crossings', 'slope_sign_changes']]
dataframe = dataframe.replace([np.inf, -np.inf], np.nan).dropna()

data = dataframe

# Split the data into features and target
X = data.drop('popularity', axis=1)  # Replace 'target_column_name' with the actual target column name
y = data['popularity']

# Perform feature scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Random Forest Importance ile özellik seçimi
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
importances = rf.feature_importances_
selected_indices_rf = np.argsort(importances)[::-1][:5]
selected_features_rf = X.columns[selected_indices_rf]

# Boruta ile özellik seçimi
boruta = BorutaPy(estimator=RandomForestRegressor(), n_estimators='auto', max_iter=25)
boruta.fit(X_train, y_train)
selected_indices_boruta = boruta.support_
selected_features_boruta = X.columns[selected_indices_boruta]

# RFE ile özellik seçimi
model = LinearRegression()
rfe = RFE(estimator=model, n_features_to_select=5)
rfe.fit(X_train, y_train)
selected_indices_rfe = rfe.support_
selected_features_rfe = X.columns[selected_indices_rfe]

# Lasso Regression ile özellik seçimi
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
selected_indices_lasso = np.nonzero(lasso.coef_)[0]
selected_features_lasso = X.columns[selected_indices_lasso]

# SelectKBest ile özellik seçimi
kbest = SelectKBest(score_func=f_regression, k=5)
kbest.fit(X_train, y_train)
selected_indices_kbest = kbest.get_support(indices=True)
selected_features_kbest = X.columns[selected_indices_kbest]

# Seçilen özellikleri yazdır
print("Selected Features using Random Forest Importance:")
print(selected_features_rf)
print("\nSelected Features using Boruta:")
print(selected_features_boruta)
print("\nSelected Features using Recursive Feature Elimination (RFE):")
print(selected_features_rfe)
print("\nSelected Features using Lasso Regression:")
print(selected_features_lasso)
print("\nSelected Features using SelectKBest:")
print(selected_features_kbest)


Selected Features using Random Forest Importance:
Index(['speechiness', 'tempo', 'skewness', 'kurtosis', 'valence'], dtype='object')

Selected Features using Boruta:
Index(['acousticness', 'danceability', 'instrumentalness', 'liveness',
       'speechiness', 'tempo', 'valence', 'acoustic_energy_ratio',
       'danceability_energy_diff', 'duration_energy_density', 'mean', 'median',
       'sum', 'kurtosis', 'skewness'],
      dtype='object')

Selected Features using Recursive Feature Elimination (RFE):
Index(['duration_ms', 'tempo', 'mean', 'max', 'sum'], dtype='object')

Selected Features using Lasso Regression:
Index(['danceability', 'energy', 'mode', 'speechiness', 'valence',
       'energy_instrumentalness_diff', 'danceability_energy_diff', 'mean',
       'sum'],
      dtype='object')

Selected Features using SelectKBest:
Index(['duration_ms', 'speechiness', 'mean', 'max', 'sum'], dtype='object')


In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sun Jun  4 12:37:19 2023

@author: Tahsin
"""
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from boruta import BorutaPy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE, SelectKBest, f_regression
from sklearn.linear_model import LinearRegression, Lasso
import warnings

warnings.filterwarnings("ignore")

# Load the dataset
dataframe = pd.read_csv('/content/drive/MyDrive/bitirme/dataset.csv')

features = ["acousticness", "danceability", "duration_ms", "energy", "instrumentalness", "key", "liveness",
            "mode", "speechiness", "tempo", "time_signature", "valence", 'loudness',"popularity"]

# korelasyon tabanlı feature engineering
dataframe['energy_loudness_ratio'] = dataframe['energy'] / dataframe['loudness']
dataframe['acoustic_energy_ratio'] = dataframe['acousticness'] / dataframe['energy']
dataframe['loudness_instrumentalness_diff'] = abs(dataframe['loudness'] - dataframe['instrumentalness'])
dataframe['energy_instrumentalness_diff'] = abs(dataframe['energy'] - dataframe['instrumentalness'])
dataframe['danceability_energy_diff'] = abs(dataframe['danceability'] - dataframe['energy'])
dataframe['duration_energy_density'] = dataframe['duration_ms'] / dataframe['energy']

#istatistiksel feature engineering
dataframe['mean'] = dataframe[features].mean(axis=1)
dataframe['std'] = dataframe[features].std(axis=1)
dataframe['var'] = dataframe[features].var(axis=1)
dataframe['median'] = dataframe[features].median(axis=1)
dataframe['min'] = dataframe[features].min(axis=1)
dataframe['max'] = dataframe[features].max(axis=1)
dataframe['sum'] = dataframe[features].sum(axis=1)
dataframe['kurtosis'] = dataframe[features].kurtosis(axis=1)
dataframe['skewness'] = dataframe[features].skew(axis=1)
dataframe['zero_crossings'] = (np.diff(np.sign(dataframe[features].values), axis=1) != 0).sum(axis=1)
dataframe['slope_sign_changes'] = (np.diff(np.sign(np.gradient(dataframe[features].values, axis=1)), axis=1) != 0).sum(axis=1)


# Use the updated dataframe for further analysis
# Tüm özellikleri içeren güncellenmiş dataframe'i kullanarak ilerleyen analizlerde kullanabilirsiniz
dataframe = dataframe[features + ['energy_loudness_ratio', 'acoustic_energy_ratio', 'loudness_instrumentalness_diff', 'energy_instrumentalness_diff', 'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'min', 'max', 'sum', 'kurtosis', 'skewness', 'zero_crossings', 'slope_sign_changes']]
dataframe = dataframe.replace([np.inf, -np.inf], np.nan).dropna()

data = dataframe

# Split the data into features and target
X = data.drop('popularity', axis=1)  # Replace 'target_column_name' with the actual target column name
y = data['popularity']

# Perform feature scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Random Forest Importance ile özellik seçimi
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
importances = rf.feature_importances_
selected_indices_rf = np.argsort(importances)[::-1][:5]
selected_features_rf = X.columns[selected_indices_rf]

# Boruta ile özellik seçimi
boruta = BorutaPy(estimator=RandomForestRegressor(), n_estimators='auto', max_iter=25)
boruta.fit(X_train, y_train)
selected_indices_boruta = boruta.support_
selected_features_boruta = X.columns[selected_indices_boruta]

# RFE ile özellik seçimi
model = LinearRegression()
rfe = RFE(estimator=model, n_features_to_select=5)
rfe.fit(X_train, y_train)
selected_indices_rfe = rfe.support_
selected_features_rfe = X.columns[selected_indices_rfe]

# Lasso Regression ile özellik seçimi
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
selected_indices_lasso = np.nonzero(lasso.coef_)[0]
selected_features_lasso = X.columns[selected_indices_lasso]

# SelectKBest ile özellik seçimi
kbest = SelectKBest(score_func=f_regression, k=5)
kbest.fit(X_train, y_train)
selected_indices_kbest = kbest.get_support(indices=True)
selected_features_kbest = X.columns[selected_indices_kbest]

# Seçilen özellikleri yazdır
print("Selected Features using Random Forest Importance:")
print(selected_features_rf)
print("\nSelected Features using Boruta:")
print(selected_features_boruta)
print("\nSelected Features using Recursive Feature Elimination (RFE):")
print(selected_features_rfe)
print("\nSelected Features using Lasso Regression:")
print(selected_features_lasso)
print("\nSelected Features using SelectKBest:")
print(selected_features_kbest)


Selected Features using Random Forest Importance:
Index(['median', 'valence', 'tempo', 'danceability', 'acousticness'], dtype='object')

Selected Features using Boruta:
Index(['acousticness', 'danceability', 'energy', 'instrumentalness', 'key',
       'liveness', 'mode', 'speechiness', 'tempo', 'valence',
       'energy_loudness_ratio', 'acoustic_energy_ratio',
       'loudness_instrumentalness_diff', 'energy_instrumentalness_diff',
       'danceability_energy_diff', 'mean', 'std', 'median', 'sum', 'kurtosis',
       'skewness', 'zero_crossings'],
      dtype='object')

Selected Features using Recursive Feature Elimination (RFE):
Index(['duration_ms', 'tempo', 'mean', 'max', 'sum'], dtype='object')

Selected Features using Lasso Regression:
Index(['acousticness', 'danceability', 'energy', 'instrumentalness', 'key',
       'liveness', 'mode', 'speechiness', 'valence', 'median',
       'zero_crossings'],
      dtype='object')

Selected Features using SelectKBest:
Index(['instrumentalness

'acousticness', 'danceability', 'duration_ms', 'instrumentalness', 'speechiness', 'tempo',
'valence', 'energy_loudness_ratio', 'acoustic_energy_ratio', 'energy_instrumentalness_diff',
'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'max',
'sum', 'kurtosis', 'skewness'
Özellikleri seçildi

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sun Jun  4 12:37:19 2023

@author: Tahsin
"""
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from boruta import BorutaPy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE, SelectKBest, f_regression
from sklearn.linear_model import LinearRegression, Lasso
import warnings

warnings.filterwarnings("ignore")

# Load the dataset
dataframe = pd.read_csv('/content/drive/MyDrive/bitirme/tracks.csv')

features = ["acousticness", "danceability", "duration_ms", "energy", "instrumentalness", "key", "liveness",
            "mode", "speechiness", "tempo", "time_signature", "valence", 'loudness',"popularity"]

# korelasyon tabanlı feature engineering
dataframe['energy_loudness_ratio'] = dataframe['energy'] / dataframe['loudness']
dataframe['acoustic_energy_ratio'] = dataframe['acousticness'] / dataframe['energy']
dataframe['loudness_instrumentalness_diff'] = abs(dataframe['loudness'] - dataframe['instrumentalness'])
dataframe['energy_instrumentalness_diff'] = abs(dataframe['energy'] - dataframe['instrumentalness'])
dataframe['danceability_energy_diff'] = abs(dataframe['danceability'] - dataframe['energy'])
dataframe['duration_energy_density'] = dataframe['duration_ms'] / dataframe['energy']

#istatistiksel feature engineering
dataframe['mean'] = dataframe[features].mean(axis=1)
dataframe['std'] = dataframe[features].std(axis=1)
dataframe['var'] = dataframe[features].var(axis=1)
dataframe['median'] = dataframe[features].median(axis=1)
dataframe['min'] = dataframe[features].min(axis=1)
dataframe['max'] = dataframe[features].max(axis=1)
dataframe['sum'] = dataframe[features].sum(axis=1)
dataframe['kurtosis'] = dataframe[features].kurtosis(axis=1)
dataframe['skewness'] = dataframe[features].skew(axis=1)
dataframe['zero_crossings'] = (np.diff(np.sign(dataframe[features].values), axis=1) != 0).sum(axis=1)
dataframe['slope_sign_changes'] = (np.diff(np.sign(np.gradient(dataframe[features].values, axis=1)), axis=1) != 0).sum(axis=1)


# Use the updated dataframe for further analysis
# Tüm özellikleri içeren güncellenmiş dataframe'i kullanarak ilerleyen analizlerde kullanabilirsiniz
dataframe = dataframe[features + ['energy_loudness_ratio', 'acoustic_energy_ratio', 'loudness_instrumentalness_diff', 'energy_instrumentalness_diff', 'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'min', 'max', 'sum', 'kurtosis', 'skewness', 'zero_crossings', 'slope_sign_changes']]
dataframe = dataframe.replace([np.inf, -np.inf], np.nan).dropna()

data = dataframe

# Split the data into features and target
X = data.drop('popularity', axis=1)  # Replace 'target_column_name' with the actual target column name
y = data['popularity']

# Perform feature scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Random Forest Importance ile özellik seçimi
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
importances = rf.feature_importances_
selected_indices_rf = np.argsort(importances)[::-1][:5]
selected_features_rf = X.columns[selected_indices_rf]

# Boruta ile özellik seçimi
boruta = BorutaPy(estimator=RandomForestRegressor(), n_estimators='auto', max_iter=25)
boruta.fit(X_train, y_train)
selected_indices_boruta = boruta.support_
selected_features_boruta = X.columns[selected_indices_boruta]

# RFE ile özellik seçimi
model = LinearRegression()
rfe = RFE(estimator=model, n_features_to_select=5)
rfe.fit(X_train, y_train)
selected_indices_rfe = rfe.support_
selected_features_rfe = X.columns[selected_indices_rfe]

# Lasso Regression ile özellik seçimi
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
selected_indices_lasso = np.nonzero(lasso.coef_)[0]
selected_features_lasso = X.columns[selected_indices_lasso]

# SelectKBest ile özellik seçimi
kbest = SelectKBest(score_func=f_regression, k=5)
kbest.fit(X_train, y_train)
selected_indices_kbest = kbest.get_support(indices=True)
selected_features_kbest = X.columns[selected_indices_kbest]

# Seçilen özellikleri yazdır
print("Selected Features using Random Forest Importance:")
print(selected_features_rf)
print("\nSelected Features using Boruta:")
print(selected_features_boruta)
print("\nSelected Features using Recursive Feature Elimination (RFE):")
print(selected_features_rfe)
print("\nSelected Features using Lasso Regression:")
print(selected_features_lasso)
print("\nSelected Features using SelectKBest:")
print(selected_features_kbest)


In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from boruta import BorutaPy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE, SelectKBest, f_regression
from sklearn.linear_model import LinearRegression, Lasso
import warnings

warnings.filterwarnings("ignore")

# Load the dataset
dataframe = pd.read_csv('/content/drive/MyDrive/bitirme/out.csv')

features = ["acousticness", "danceability", "duration_ms", "energy", "instrumentalness", "key", "liveness",
            "mode", "speechiness", "tempo", "time_signature", "valence", 'loudness',"popularity"]

# Korelasyon tabanlı feature engineering
dataframe['energy_loudness_ratio'] = dataframe['energy'] / dataframe['loudness']
dataframe['acoustic_energy_ratio'] = dataframe['acousticness'] / dataframe['energy']
dataframe['loudness_instrumentalness_diff'] = abs(dataframe['loudness'] - dataframe['instrumentalness'])
dataframe['energy_instrumentalness_diff'] = abs(dataframe['energy'] - dataframe['instrumentalness'])
dataframe['danceability_energy_diff'] = abs(dataframe['danceability'] - dataframe['energy'])
dataframe['duration_energy_density'] = dataframe['duration_ms'] / dataframe['energy']

# Istatistiksel feature engineering
dataframe['mean'] = dataframe[features].mean(axis=1)
dataframe['std'] = dataframe[features].std(axis=1)
dataframe['var'] = dataframe[features].var(axis=1)
dataframe['median'] = dataframe[features].median(axis=1)
dataframe['min'] = dataframe[features].min(axis=1)
dataframe['max'] = dataframe[features].max(axis=1)
dataframe['sum'] = dataframe[features].sum(axis=1)
dataframe['kurtosis'] = dataframe[features].kurtosis(axis=1)
dataframe['skewness'] = dataframe[features].skew(axis=1)
dataframe['zero_crossings'] = (np.diff(np.sign(dataframe[features].values), axis=1) != 0).sum(axis=1)
dataframe['slope_sign_changes'] = (np.diff(np.sign(np.gradient(dataframe[features].values, axis=1)), axis=1) != 0).sum(axis=1)

# Use the updated dataframe for further analysis
dataframe = dataframe[features + ['energy_loudness_ratio', 'acoustic_energy_ratio', 'loudness_instrumentalness_diff', 'energy_instrumentalness_diff', 'danceability_energy_diff', 'duration_energy_density', 'mean', 'std', 'var', 'median', 'min', 'max', 'sum', 'kurtosis', 'skewness', 'zero_crossings', 'slope_sign_changes']]
dataframe = dataframe.replace([np.inf, -np.inf], np.nan).dropna()
data = dataframe

# Split the data into features and target
X = data.drop('popularity', axis=1)  # Replace 'popularity' with the actual target column name
y = data['popularity']

# Let's initialize a RF model
model = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42)

# Let's initialize Boruta
feat_selector = BorutaPy(
    verbose=2,
    estimator=model,
    n_estimators='auto',
    max_iter=35  # Number of iterations to perform
)

# Train Boruta
# N.B.: X and y must be numpy arrays
feat_selector.fit(np.array(X), np.array(y))

# Print support and ranking for each feature
print("\n------Support and Ranking for each feature------")
for i in range(len(feat_selector.support_)):
    if feat_selector.support_[i]:
        print("Passes the test:", X.columns[i], " - Ranking:", feat_selector.ranking_[i])
    else:
        print("Doesn't pass the test:", X.columns[i], " - Ranking:", feat_selector.ranking_[i])


Iteration: 	1 / 35
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	2 / 35
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	3 / 35
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	4 / 35
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	5 / 35
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	6 / 35
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	7 / 35
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	8 / 35
Confirmed: 	20
Tentative: 	4
Rejected: 	6
Iteration: 	9 / 35
Confirmed: 	20
Tentative: 	4
Rejected: 	6
Iteration: 	10 / 35
Confirmed: 	20
Tentative: 	4
Rejected: 	6
Iteration: 	11 / 35
Confirmed: 	20
Tentative: 	4
Rejected: 	6
Iteration: 	12 / 35
Confirmed: 	20
Tentative: 	4
Rejected: 	6
Iteration: 	13 / 35
Confirmed: 	20
Tentative: 	4
Rejected: 	6
Iteration: 	14 / 35
Confirmed: 	20
Tentative: 	4
Rejected: 	6
Iteration: 	15 / 35
Confirmed: 	20
Tentative: 	4
Rejected: 	6
Iteration: 	16 / 35
Confirmed: 	20
Tentative: 	4
Rejected: 	6
Iteration: 	17 / 