In [1]:
# Import the data
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from scipy.stats import pearsonr

df = pd.read_csv('wines_SPA.csv')
df.head()

Unnamed: 0,winery,wine,year,rating,num_reviews,country,region,price,type,body,acidity
0,Teso La Monja,Tinto,2013,4.9,58,Espana,Toro,995.0,Toro Red,5.0,3.0
1,Artadi,Vina El Pison,2018,4.9,31,Espana,Vino de Espana,313.5,Tempranillo,4.0,2.0
2,Vega Sicilia,Unico,2009,4.8,1793,Espana,Ribera del Duero,324.95,Ribera Del Duero Red,5.0,3.0
3,Vega Sicilia,Unico,1999,4.8,1705,Espana,Ribera del Duero,692.96,Ribera Del Duero Red,5.0,3.0
4,Vega Sicilia,Unico,1996,4.8,1309,Espana,Ribera del Duero,778.06,Ribera Del Duero Red,5.0,3.0


In [2]:
#Create a data frame
wine_df = pd.DataFrame(df)
wine_df.head()

Unnamed: 0,winery,wine,year,rating,num_reviews,country,region,price,type,body,acidity
0,Teso La Monja,Tinto,2013,4.9,58,Espana,Toro,995.0,Toro Red,5.0,3.0
1,Artadi,Vina El Pison,2018,4.9,31,Espana,Vino de Espana,313.5,Tempranillo,4.0,2.0
2,Vega Sicilia,Unico,2009,4.8,1793,Espana,Ribera del Duero,324.95,Ribera Del Duero Red,5.0,3.0
3,Vega Sicilia,Unico,1999,4.8,1705,Espana,Ribera del Duero,692.96,Ribera Del Duero Red,5.0,3.0
4,Vega Sicilia,Unico,1996,4.8,1309,Espana,Ribera del Duero,778.06,Ribera Del Duero Red,5.0,3.0


In [3]:
wine_df['winery'].value_counts()

winery
Contino                                 457
Artadi                                  261
La Rioja Alta                           254
Sierra Cantabria                        237
Matarromera                             232
                                       ... 
Briego                                    1
Guillem Carol - Cellers Carol Valles      1
Particular                                1
Bodegas Asenjo & Manso                    1
Joan Simo                                 1
Name: count, Length: 480, dtype: int64

In [4]:
wine_df['wine'].value_counts()

wine
Reserva                         467
Gran Reserva                    458
Rioja Reserva                   240
El Viejo                        224
Corimbo I                       223
                               ... 
4 Varietales Coleccion Rioja      1
Noble                             1
Primordium                        1
Finca Helena                      1
Capricho Crianza                  1
Name: count, Length: 847, dtype: int64

In [5]:
wine_df['region'].value_counts()

region
Rioja                              2440
Ribera del Duero                   1413
Priorato                            686
Toro                                300
Vino de Espana                      263
                                   ... 
Ribera del Gallego-Cinco Villas       1
Monterrei                             1
Jerez Cream                           1
Condado de Huelva                     1
Abona                                 1
Name: count, Length: 76, dtype: int64

In [6]:
wine_df['type'].value_counts()

type
Rioja Red               2357
Ribera Del Duero Red    1407
Red                      864
Priorat Red              674
Toro Red                 296
Tempranillo              291
Sherry                   274
Albarino                 252
Mencia                   235
Rioja White               92
Pedro Ximenez             35
Grenache                  35
Cava                      33
Verdejo                   27
Monastrell                18
Montsant Red              17
Syrah                     15
Chardonnay                13
Cabernet Sauvignon        11
Sparkling                  5
Sauvignon Blanc            4
Name: count, dtype: int64

In [7]:
wine_df.isna().sum()

winery            0
wine              0
year              2
rating            0
num_reviews       0
country           0
region            0
price             0
type            545
body           1169
acidity        1169
dtype: int64

In [8]:
wine_df.dropna(inplace=True)

In [9]:
wine_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6329 entries, 0 to 7499
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   winery       6329 non-null   object 
 1   wine         6329 non-null   object 
 2   year         6329 non-null   object 
 3   rating       6329 non-null   float64
 4   num_reviews  6329 non-null   int64  
 5   country      6329 non-null   object 
 6   region       6329 non-null   object 
 7   price        6329 non-null   float64
 8   type         6329 non-null   object 
 9   body         6329 non-null   float64
 10  acidity      6329 non-null   float64
dtypes: float64(4), int64(1), object(6)
memory usage: 593.3+ KB


In [10]:

encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoder.set_output(transform="pandas")
encoder.fit(wine_df[['winery', 'wine', 'region', 'type']])
encoded_columns = encoder.transform(wine_df[['winery', 'wine', 'region', 'type']])
# encoded_df = pd.DataFrame(encoded_columns.toarray(), columns=encoder.get_feature_names_out(columns_to_encode))
# print(encoded_columns)
column_names = encoder.get_feature_names_out()
print(column_names)
encoded_df = pd.DataFrame(encoded_columns, columns=column_names)

['winery_AGE' 'winery_Aalto' 'winery_Abadal' ... 'type_Tempranillo'
 'type_Toro Red' 'type_Verdejo']


In [11]:
encoded_df.head()

Unnamed: 0,winery_AGE,winery_Aalto,winery_Abadal,winery_Abadia Retuerta,winery_Abel Mendoza Monge,winery_Acustic Celler,winery_Adama Wines,winery_Adega Familiar Eladio Pineiro,winery_Agusti Torello Mata,winery_Albamar,...,type_Ribera Del Duero Red,type_Rioja Red,type_Rioja White,type_Sauvignon Blanc,type_Sherry,type_Sparkling,type_Syrah,type_Tempranillo,type_Toro Red,type_Verdejo
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
wine_df_encoded = pd.concat([wine_df, encoded_df], axis=1)
wine_df_encoded.drop(['winery', 'wine', 'region', 'type', 'country'], axis=1, inplace=True)
wine_df_encoded.head()

Unnamed: 0,year,rating,num_reviews,price,body,acidity,winery_AGE,winery_Aalto,winery_Abadal,winery_Abadia Retuerta,...,type_Ribera Del Duero Red,type_Rioja Red,type_Rioja White,type_Sauvignon Blanc,type_Sherry,type_Sparkling,type_Syrah,type_Tempranillo,type_Toro Red,type_Verdejo
0,2013,4.9,58,995.0,5.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,2018,4.9,31,313.5,4.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,2009,4.8,1793,324.95,5.0,3.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1999,4.8,1705,692.96,5.0,3.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1996,4.8,1309,778.06,5.0,3.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
# Convert non-numeric 'year' values to NaN
wine_df_encoded['year'] = pd.to_numeric(wine_df_encoded['year'], errors='coerce')

# Drop rows with NaN values in 'year' column
wine_df_encoded = wine_df_encoded.dropna(subset=['year'])

In [14]:
scaler = StandardScaler()
scaler.fit(wine_df_encoded)

scaled_data = scaler.transform(wine_df_encoded)
scaled_df = pd.DataFrame(scaled_data, columns=wine_df_encoded.columns)
scaled_df.head()


Unnamed: 0,year,rating,num_reviews,price,body,acidity,winery_AGE,winery_Aalto,winery_Abadal,winery_Abadia Retuerta,...,type_Ribera Del Duero Red,type_Rioja Red,type_Rioja White,type_Sauvignon Blanc,type_Sherry,type_Sparkling,type_Syrah,type_Tempranillo,type_Toro Red,type_Verdejo
0,0.006272,5.114491,-0.63149,5.604802,1.407978,0.217748,-0.012836,-0.040622,-0.022237,-0.057496,...,-0.516169,-0.738721,-0.119882,-0.022237,-0.018155,-0.028712,-0.046328,-0.214501,4.717701,-0.065588
1,0.706096,5.114491,-0.676116,1.487011,-0.275715,-3.899793,-0.012836,-0.040622,-0.022237,-0.057496,...,-0.516169,-0.738721,-0.119882,-0.022237,-0.018155,-0.028712,-0.046328,4.661983,-0.211968,-0.065588
2,-0.553587,4.315208,2.236173,1.556195,1.407978,0.217748,-0.012836,-0.040622,-0.022237,-0.057496,...,1.937351,-0.738721,-0.119882,-0.022237,-0.018155,-0.028712,-0.046328,-0.214501,-0.211968,-0.065588
3,-1.953235,4.315208,2.090724,3.779802,1.407978,0.217748,-0.012836,-0.040622,-0.022237,-0.057496,...,1.937351,-0.738721,-0.119882,-0.022237,-0.018155,-0.028712,-0.046328,-0.214501,-0.211968,-0.065588
4,-2.373129,4.315208,1.436203,4.293997,1.407978,0.217748,-0.012836,-0.040622,-0.022237,-0.057496,...,1.937351,-0.738721,-0.119882,-0.022237,-0.018155,-0.028712,-0.046328,-0.214501,-0.211968,-0.065588


In [15]:
wine_df_encoded.corr()

Unnamed: 0,year,rating,num_reviews,price,body,acidity,winery_AGE,winery_Aalto,winery_Abadal,winery_Abadia Retuerta,...,type_Ribera Del Duero Red,type_Rioja Red,type_Rioja White,type_Sauvignon Blanc,type_Sherry,type_Sparkling,type_Syrah,type_Tempranillo,type_Toro Red,type_Verdejo
year,1.000000,-0.299519,0.042120,-0.384351,-0.100687,0.155387,-0.096938,0.020723,0.012589,0.006799,...,-0.063803,-0.199310,0.043480,0.017776,-0.060871,-0.014287,0.017748,0.067575,0.137507,0.045252
rating,-0.299519,1.000000,-0.003144,0.551943,0.161058,-0.085140,0.004092,0.074640,0.001164,0.061987,...,0.168104,-0.156848,0.000335,0.024862,0.049320,0.032103,0.034708,0.011232,0.008503,0.061235
num_reviews,0.042120,-0.003144,1.000000,-0.045856,0.079627,0.050742,-0.008721,0.128458,-0.005859,0.059835,...,0.085506,0.021647,-0.019770,-0.014875,-0.011960,-0.018388,-0.008098,-0.035323,0.016511,-0.013291
price,-0.384351,0.551943,-0.045856,1.000000,0.150861,-0.030566,0.012611,0.002633,-0.006149,0.002651,...,0.148760,-0.101955,-0.012442,-0.007130,0.027208,0.001630,-0.007137,0.010452,0.017485,-0.013248
body,-0.100687,0.161058,0.079627,0.150861,1.000000,-0.001638,-0.003539,0.057195,-0.006131,-0.015853,...,0.726754,-0.203677,-0.234898,-0.043571,-0.005006,-0.104602,-0.012773,-0.059141,0.298446,-0.128514
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
type_Sparkling,-0.014287,0.032103,-0.018388,0.001630,-0.104602,0.006252,-0.000369,-0.001166,-0.000638,-0.001651,...,-0.014820,-0.021210,-0.003442,-0.000638,-0.000521,1.000000,-0.001330,-0.006159,-0.006086,-0.001883
type_Syrah,0.017748,0.034708,-0.008098,-0.007137,-0.012773,0.010088,-0.000595,-0.001882,-0.001030,0.121709,...,-0.023913,-0.034223,-0.005554,-0.001030,-0.000841,-0.001330,1.000000,-0.009937,-0.009820,-0.003039
type_Tempranillo,0.067575,0.011232,-0.035323,0.010452,-0.059141,-0.836510,-0.002753,-0.008714,-0.004770,0.057762,...,-0.110719,-0.158456,-0.025715,-0.004770,-0.003894,-0.006159,-0.009937,1.000000,-0.045467,-0.014069
type_Toro Red,0.137507,0.008503,0.016511,0.017485,0.298446,0.046156,-0.002721,-0.008611,-0.004713,-0.012187,...,-0.109411,-0.156585,-0.025411,-0.004713,-0.003848,-0.006086,-0.009820,-0.045467,1.000000,-0.013903


In [16]:
y = wine_df_encoded['price']
X = wine_df_encoded.drop(columns='price')

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [18]:
#KNN neighbors

In [19]:
#Random Forest

In [20]:
#XG Boost

In [21]:
#SVM

In [22]:
#Linear Regression

In [23]:
#Logistic Regression

In [24]:
from ClassificationDataPipeline import train_and_evaluate_models
df = pd.read_csv('wines_SPA.csv')
target_column = 'rating'
Randomstate = 50
accuracy_scores = train_and_evaluate_models(df, target_column, Randomstate)
print(accuracy_scores)

KNN - Training Score: 0.8580, R² Score: 0.8580, MSE: 0.0022
KNN - Testing Score: 0.8020, R² Score: 0.8020, MSE: 0.0028
--------------------------------------------------
Random Forest - Training Score: 0.9731, R² Score: 0.9731, MSE: 0.0004
Random Forest - Testing Score: 0.8384, R² Score: 0.8384, MSE: 0.0023
--------------------------------------------------
Gradient Boosting - Training Score: 0.8595, R² Score: 0.8595, MSE: 0.0022
Gradient Boosting - Testing Score: 0.8338, R² Score: 0.8338, MSE: 0.0024
--------------------------------------------------
AdaBoost - Training Score: 0.7805, R² Score: 0.7805, MSE: 0.0035
AdaBoost - Testing Score: 0.7736, R² Score: 0.7736, MSE: 0.0032
--------------------------------------------------
SVM - Training Score: 0.4408, R² Score: 0.4408, MSE: 0.0088
SVM - Testing Score: 0.3185, R² Score: 0.3185, MSE: 0.0098
--------------------------------------------------
Linear Regression - Training Score: 0.9402, R² Score: 0.9402, MSE: 0.0009
Linear Regression 

In [25]:
from ClassificationDataPipeline import train_and_evaluate_models
df = pd.read_csv('wines_SPA.csv')
target_column = 'price'
Randomstate = 50
accuracy_scores = train_and_evaluate_models(df, target_column, Randomstate)
print(accuracy_scores)

KNN - Training Score: 0.7961, R² Score: 0.7961, MSE: 5870.6149
KNN - Testing Score: 0.7050, R² Score: 0.7050, MSE: 5703.1032
--------------------------------------------------
Random Forest - Training Score: 0.9535, R² Score: 0.9535, MSE: 1337.7252
Random Forest - Testing Score: 0.7575, R² Score: 0.7575, MSE: 4687.9645
--------------------------------------------------
Gradient Boosting - Training Score: 0.9197, R² Score: 0.9197, MSE: 2313.2629
Gradient Boosting - Testing Score: 0.7452, R² Score: 0.7452, MSE: 4926.5534
--------------------------------------------------
AdaBoost - Training Score: -8.9156, R² Score: -8.9156, MSE: 285533.9370
AdaBoost - Testing Score: -13.8525, R² Score: -13.8525, MSE: 287140.0889
--------------------------------------------------
SVM - Training Score: 0.0865, R² Score: 0.0865, MSE: 26304.8921
SVM - Testing Score: 0.1173, R² Score: 0.1173, MSE: 17064.7195
--------------------------------------------------
Linear Regression - Training Score: 0.8752, R² Sco

In [26]:
df.head()

Unnamed: 0,winery,wine,year,rating,num_reviews,country,region,price,type,body,acidity
0,Teso La Monja,Tinto,2013,4.9,58,Espana,Toro,995.0,Toro Red,5.0,3.0
1,Artadi,Vina El Pison,2018,4.9,31,Espana,Vino de Espana,313.5,Tempranillo,4.0,2.0
2,Vega Sicilia,Unico,2009,4.8,1793,Espana,Ribera del Duero,324.95,Ribera Del Duero Red,5.0,3.0
3,Vega Sicilia,Unico,1999,4.8,1705,Espana,Ribera del Duero,692.96,Ribera Del Duero Red,5.0,3.0
4,Vega Sicilia,Unico,1996,4.8,1309,Espana,Ribera del Duero,778.06,Ribera Del Duero Red,5.0,3.0


In [27]:

cleaned_df = wine_df.drop(columns=['country', 'num_reviews'])
cleaned_df.head()

Unnamed: 0,winery,wine,year,rating,region,price,type,body,acidity
0,Teso La Monja,Tinto,2013,4.9,Toro,995.0,Toro Red,5.0,3.0
1,Artadi,Vina El Pison,2018,4.9,Vino de Espana,313.5,Tempranillo,4.0,2.0
2,Vega Sicilia,Unico,2009,4.8,Ribera del Duero,324.95,Ribera Del Duero Red,5.0,3.0
3,Vega Sicilia,Unico,1999,4.8,Ribera del Duero,692.96,Ribera Del Duero Red,5.0,3.0
4,Vega Sicilia,Unico,1996,4.8,Ribera del Duero,778.06,Ribera Del Duero Red,5.0,3.0


In [28]:
lbencoder = LabelEncoder()
labels = cleaned_df['type']
cleaned_df['type'] = lbencoder.fit_transform(labels)
cleaned_df.head()

Unnamed: 0,winery,wine,year,rating,region,price,type,body,acidity
0,Teso La Monja,Tinto,2013,4.9,Toro,995.0,19,5.0,3.0
1,Artadi,Vina El Pison,2018,4.9,Vino de Espana,313.5,18,4.0,2.0
2,Vega Sicilia,Unico,2009,4.8,Ribera del Duero,324.95,11,5.0,3.0
3,Vega Sicilia,Unico,1999,4.8,Ribera del Duero,692.96,11,5.0,3.0
4,Vega Sicilia,Unico,1996,4.8,Ribera del Duero,778.06,11,5.0,3.0


In [29]:
cleaned_df.dropna()

Unnamed: 0,winery,wine,year,rating,region,price,type,body,acidity
0,Teso La Monja,Tinto,2013,4.9,Toro,995.00,19,5.0,3.0
1,Artadi,Vina El Pison,2018,4.9,Vino de Espana,313.50,18,4.0,2.0
2,Vega Sicilia,Unico,2009,4.8,Ribera del Duero,324.95,11,5.0,3.0
3,Vega Sicilia,Unico,1999,4.8,Ribera del Duero,692.96,11,5.0,3.0
4,Vega Sicilia,Unico,1996,4.8,Ribera del Duero,778.06,11,5.0,3.0
...,...,...,...,...,...,...,...,...,...
7495,Contino,Reserva,2016,4.2,Rioja,19.98,12,4.0,3.0
7496,Conreria d'Scala Dei,Les Brugueres,2018,4.2,Priorato,16.76,9,4.0,3.0
7497,Mustiguillo,Finca Terrerazo,2017,4.2,El Terrerazo,24.45,10,4.0,3.0
7498,Matarromera,Gran Reserva,2011,4.2,Ribera del Duero,64.50,11,5.0,3.0


In [30]:
from ClassificationDataPipeline import train_and_evaluate_models
df = cleaned_df
target_column = 'type'
Randomstate = 50
accuracy_scores = train_and_evaluate_models(df, target_column, Randomstate)
print(accuracy_scores)

KNN - Training Score: 0.9594, R² Score: 0.9594, MSE: 0.5133
KNN - Testing Score: 0.9517, R² Score: 0.9517, MSE: 0.7244
--------------------------------------------------
Random Forest - Training Score: 0.9961, R² Score: 0.9961, MSE: 0.0499
Random Forest - Testing Score: 0.9842, R² Score: 0.9842, MSE: 0.2366
--------------------------------------------------
Gradient Boosting - Training Score: 0.9665, R² Score: 0.9665, MSE: 0.4235
Gradient Boosting - Testing Score: 0.9603, R² Score: 0.9603, MSE: 0.5960
--------------------------------------------------
AdaBoost - Training Score: 0.6371, R² Score: 0.6371, MSE: 4.5924
AdaBoost - Testing Score: 0.6652, R² Score: 0.6652, MSE: 5.0253
--------------------------------------------------
SVM - Training Score: 0.9026, R² Score: 0.9026, MSE: 1.2321
SVM - Testing Score: 0.9032, R² Score: 0.9032, MSE: 1.4531
--------------------------------------------------
Linear Regression - Training Score: 1.0000, R² Score: 1.0000, MSE: 0.0000
Linear Regression 

In [31]:
from ClassificationDataPipeline import train_and_evaluate_models
df = cleaned_df
target_column = 'price'
Randomstate = 50
accuracy_scores = train_and_evaluate_models(df, target_column, Randomstate)
print(accuracy_scores)

KNN - Training Score: 0.7939, R² Score: 0.7939, MSE: 5935.0136
KNN - Testing Score: 0.6752, R² Score: 0.6752, MSE: 6278.7405
--------------------------------------------------
Random Forest - Training Score: 0.9531, R² Score: 0.9531, MSE: 1349.1637
Random Forest - Testing Score: 0.7204, R² Score: 0.7204, MSE: 5406.1572
--------------------------------------------------
Gradient Boosting - Training Score: 0.9090, R² Score: 0.9090, MSE: 2621.9090
Gradient Boosting - Testing Score: 0.7204, R² Score: 0.7204, MSE: 5406.0648
--------------------------------------------------
AdaBoost - Training Score: -11.0481, R² Score: -11.0481, MSE: 346940.9596
AdaBoost - Testing Score: -17.0276, R² Score: -17.0276, MSE: 348524.4019
--------------------------------------------------
SVM - Training Score: 0.0919, R² Score: 0.0919, MSE: 26150.9583
SVM - Testing Score: 0.1238, R² Score: 0.1238, MSE: 16939.7373
--------------------------------------------------
Linear Regression - Training Score: 0.8750, R² S

In [32]:
from ClassificationDataPipeline import train_and_evaluate_models
df = cleaned_df
target_column = 'rating'
Randomstate = 50
accuracy_scores = train_and_evaluate_models(df, target_column, Randomstate)
print(accuracy_scores)

KNN - Training Score: 0.8547, R² Score: 0.8547, MSE: 0.0023
KNN - Testing Score: 0.8229, R² Score: 0.8229, MSE: 0.0025
--------------------------------------------------
Random Forest - Training Score: 0.9693, R² Score: 0.9693, MSE: 0.0005
Random Forest - Testing Score: 0.8151, R² Score: 0.8151, MSE: 0.0027
--------------------------------------------------
Gradient Boosting - Training Score: 0.7892, R² Score: 0.7892, MSE: 0.0033
Gradient Boosting - Testing Score: 0.7680, R² Score: 0.7680, MSE: 0.0033
--------------------------------------------------
AdaBoost - Training Score: 0.5602, R² Score: 0.5602, MSE: 0.0070
AdaBoost - Testing Score: 0.5383, R² Score: 0.5383, MSE: 0.0066
--------------------------------------------------
SVM - Training Score: 0.4462, R² Score: 0.4462, MSE: 0.0088
SVM - Testing Score: 0.3143, R² Score: 0.3143, MSE: 0.0098
--------------------------------------------------
Linear Regression - Training Score: 0.9401, R² Score: 0.9401, MSE: 0.0009
Linear Regression 