**importing necessary libraries**

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

**dividing dataset into features and target class**

In [None]:
file_path = '/content/data(raw audio features).csv'
SBT = pd.read_csv(file_path)
SBT_normalized = SBT
features = ['danceability', 'energy','key', 'loudness', 'speechiness', 'acousticness',
            'instrumentalness', 'liveness', 'valence', 'tempo', 'duration', 'time_signature']

X = SBT[features]
Y = SBT.popularity
X.head()

Unnamed: 0,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration,time_signature
0,0.578,0.331,10,-10.499,0.0361,0.939,2.2e-05,0.113,0.126,143.946,248.481,3
1,0.674,0.415,4,-11.48,0.0306,0.175,6e-06,0.107,0.221,122.012,290.147,4
2,0.539,0.352,11,-14.203,0.0307,0.835,0.0017,0.182,0.29,109.948,222.723,4
3,0.497,0.707,4,-11.786,0.0877,0.00248,0.00287,0.1,0.702,149.97,184.179,4
4,0.657,0.286,11,-14.754,0.0327,0.718,0.01,0.169,0.674,93.917,287.125,4


**normalizing the whole dataset**

In [None]:
features = ['danceability', 'energy','key', 'loudness', 'speechiness', 'acousticness','instrumentalness',
            'liveness', 'valence', 'tempo', 'duration', 'time_signature', 'popularity']
norm = MinMaxScaler().fit(SBT[features])
SBT_normalized = norm.transform(SBT[features])
SBT_normalized_df = pd.DataFrame(SBT_normalized, columns= SBT[features].columns)
SBT_normalized_df.to_csv('SBT_normalized_dataset.csv', index=False)

In [None]:
def normalize_popularity(pop):
    return (pop - Y.min()) / (Y.max() - Y.min())

dataset_df = pd.read_csv('/content/data(raw audio features).csv')
normalized_df = pd.read_csv('SBT_normalized_dataset.csv')
first_five_columns = dataset_df.iloc[:, :5]
merged_df = pd.concat([first_five_columns, normalized_df], axis=1)
merged_df['popularity'] = merged_df['popularity'].apply(normalize_popularity)
merged_df.to_csv('SBT_normalized_dataset.csv', index=False)

## **Splitting training and test dataset** (thik korte hobe pore)

In [None]:
# To divide the dataset into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X, Y ,test_size=0.2)
X_train.head()

Unnamed: 0,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration,time_signature
1327,0.579,0.348,0,-9.646,0.0569,0.661,0.0,0.171,0.0846,159.918,263.296,4
1999,0.616,0.847,11,-4.471,0.0359,0.764,3e-06,0.543,0.805,93.838,250.681,4
268,0.744,0.651,2,-12.436,0.089,0.0573,3.1e-05,0.111,0.856,114.966,249.507,4
3039,0.514,0.323,4,-15.923,0.0581,0.215,0.0,0.212,0.749,129.007,334.44,3
442,0.728,0.752,9,-6.709,0.0335,0.347,0.0,0.121,0.842,116.004,262.014,4


## **Min-Max normalization**

In [None]:
# Good practice to keep original dataframes untouched for reusability
X_train_n = X_train.copy()
X_test_n = X_test.copy()

# Fit min-max scaler on training data
norm = MinMaxScaler().fit(X_train_n)

# Transform the training data
X_train_norm = norm.transform(X_train_n)

# Use the same scaler to transform the testing set
X_test_norm = norm.transform(X_test_n)

X_train_norm_df = pd.DataFrame(X_train_norm, columns=X.columns)


X_train_norm_df.head()


min_y_train = y_train.min()
max_y_train = y_train.max()

y_train_norm = (y_train - min_y_train) / (max_y_train - min_y_train)
y_train_norm = (y_train - min_y_train) / (max_y_train - min_y_train)


min_y_test = y_test.min()
max_y_test = y_test.max()

y_test_norm = (y_test - min_y_test) / (max_y_test - min_y_test)
y_test_norm = (y_test - min_y_test) / (max_y_test - min_y_test)

# Print the DataFrame with the normalized column
print(y_train_norm)
print(y_test_norm)

In [None]:
print(X_train_norm_df.dtypes)

danceability        float64
energy              float64
key                 float64
loudness            float64
speechiness         float64
acousticness        float64
instrumentalness    float64
liveness            float64
valence             float64
tempo               float64
duration            float64
time_signature      float64
dtype: object


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Create a Linear Regression model
linear_reg_model_encoded = LinearRegression()

# Train the Linear Regression model on the training set
linear_reg_model_encoded.fit(X_train_norm, y_train_norm)

# Make predictions using the Linear Regression model
y_pred_lr_encoded = linear_reg_model_encoded.predict(X_test_norm)

# Evaluate the Linear Regression model
mse_lr_encoded = mean_squared_error(y_test_norm, y_pred_lr_encoded)
r2_lr_encoded = r2_score(y_test_norm, y_pred_lr_encoded)

print(f'Linear Regression Mean Squared Error: {mse_lr_encoded}')
print(f'Linear Regression R-squared: {r2_lr_encoded}')

Linear Regression Mean Squared Error: 0.04488517347066192
Linear Regression R-squared: 0.01909891229315508


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Create a Random Forest Regressor model
rf_model_encoded = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the Random Forest model on the training set
rf_model_encoded.fit(X_train_norm, y_train_norm)

# Make predictions using the Random Forest model
y_pred_rf_norm = rf_model_encoded.predict(X_test_norm)

# Evaluate the Random Forest model
mse_rf_encoded = mean_squared_error(y_test_norm, y_pred_rf_norm)
r2_rf_encoded = r2_score(y_test_norm, y_pred_rf_norm)

print(f'Random Forest Mean Squared Error: {mse_rf_encoded}')
print(f'Random Forest R-squared: {r2_rf_encoded}')

Random Forest Mean Squared Error: 0.047835629261390324
Random Forest R-squared: -0.045379067194840195


In [None]:
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
# Create an SVR model
svr_model_encoded = SVR(kernel='linear', C=1.0)  # You can adjust the kernel and C parameter as needed

# Train the model on the training set
svr_model_encoded.fit(X_train_norm, y_train_norm)

# Make predictions using the SVR model
y_pred_svr_encoded = svr_model_encoded.predict(X_test_norm)

# Evaluate the SVR model
mse_svr_encoded = mean_squared_error(y_test_norm, y_pred_svr_encoded)
r2_svr_encoded = r2_score(y_test_norm, y_pred_svr_encoded)

print(f'SVR Mean Squared Error: {mse_svr_encoded}')
print(f'SVR R-squared: {r2_svr_encoded}')

SVR Mean Squared Error: 0.048013001687953076
SVR R-squared: -0.04925528717332228
