-
Notifications
You must be signed in to change notification settings - Fork 0
/
regression.py
50 lines (45 loc) · 1.93 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from preprocessing import fetch_dataset
import numpy as np
from sklearn.model_selection import KFold
import preprocessing.feature_extraction
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import normalize
def regression(domain, feat):
dataset = fetch_dataset(pickle_path='data/dataset.pickle', lyrics_path='data/results_genre.csv', force=False,
as_dict=True, random_authors=50)
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
y_all_pred = []
y_all_te = []
X = dataset['lyrics']
y = dataset[domain]
y = (y - np.min(y)) / (np.max(y) - np.min(y))
for i, (train_index, test_index) in enumerate(kfold.split(X, y)):
print(f'----- K-FOLD EXPERIMENT {i + 1} -----')
X_tr = X[train_index]
X_te = X[test_index]
y_tr = y[train_index]
y_te = y[test_index]
X_tr, X_te, feat_names = getattr(preprocessing.feature_extraction, f'extract_features_{feat}')(X_tr, X_te, y_tr)
print("Training shape: ", X_tr.shape)
print("Test shape: ", X_te.shape)
print('REGRESSION')
#reg = LinearRegression().fit(X_tr, y_tr)
#reg = DecisionTreeRegressor().fit(X_tr, y_tr)
reg = SVR(kernel='linear').fit(X_tr, y_tr)
y_pred = reg.predict(X_te)
print(y_te)
print(y_pred)
mae = mean_absolute_error(y_te, y_pred)
mse = mean_squared_error(y_te, y_pred)
print(f'Mean Absolute Error: {mae:.3f}')
print(f'Mean Squared Error: {mse:.3f}')
y_all_pred.extend(y_pred)
y_all_te.extend(y_te)
print(f'----- FINAL RESULTS -----')
mae_all = mean_absolute_error(y_all_te, y_all_pred)
mse_all = mean_squared_error(y_all_te, y_all_pred)
print(f'MAE: {mae_all:.3f}')
print(f'MSE: {mse_all:.3f}')