In [1]:
# Step 1
########################### Data Preprocessing ############################

# Importing the libraries
import numpy as np
import pandas as pd

# Importing the data set
dataset = pd.read_csv('data/beer_data.csv')


# Printing first 10 rows of the dataset
print("\n----------------------------\n",dataset.head(10))


# Dealing with the categorical data

# Spliting Cellar Temperature into Maximum and Minimum based on the given data and converting the type from str to int

dataset['Minimum_Cellar_Temp'] = dataset['Cellar Temperature'].apply(lambda x : int(x.split('-')[0].strip()))
dataset['Maximum_Cellar_Temp'] = dataset['Cellar Temperature'].apply(lambda x : int(x.split('-')[1].strip()))

# New dataset with selected features
dataset = dataset[['ABV', 'Ratings','Minimum_Cellar_Temp','Maximum_Cellar_Temp', 'Score']]

# Printing first 10 rows of the dataset
print("\n----------------------------\n",dataset.head(10))

# Printing the summary of the dataset
print("\n----------------------------\n")
print(dataset.info())


# Classifying dependent and independent variables

# All columns except the last column are independent features - (Selecting every column except Score)
X = dataset.iloc[:,:-1].values

# Only the last column is the dependent feature or the target variable(Score)
y = dataset.iloc[:,-1].values

# Creating training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2,random_state = 0)


# Step 2

########################### Support Vector Regression ###########################

# Create a Support vector  Regressor and provide the dataset
from sklearn.svm import SVR
svr = SVR(kernel='rbf') 

# Training the regressor with training data
svr.fit(X_train, y_train)

# Predicting the salary for a test set
y_pred = svr.predict(X_test)

# Priniting the predicted values
print("\n----------------------------\nPredictions = \n",y_pred)

# Calculating score from Root Mean Log Squared Error
def rmlse(y_test, y_pred):
    error = np.square(np.log10(y_pred +1) - np.log10(y_test +1)).mean() ** 0.5
    score = 1 - error
    return score

# Printing the score
print("\n----------------------------\nRMLSE Score = ", rmlse(y_test, y_pred))


----------------------------
    ABV  Ratings Cellar Temperature  Score
0  7.5        1              40-45   4.08
1  5.3       22              40-45   3.82
2  9.0        1              45-50   4.03
3  4.6        1              35-40   4.00
4  6.9        1              45-50   3.75
5  7.9       32              40-45   4.26
6  4.7      141              35-40   3.47
7  5.6        1              40-45   3.70
8  5.0        1              40-45   3.90
9  5.4       12              40-45   3.79

----------------------------
    ABV  Ratings  Minimum_Cellar_Temp  Maximum_Cellar_Temp  Score
0  7.5        1                   40                   45   4.08
1  5.3       22                   40                   45   3.82
2  9.0        1                   45                   50   4.03
3  4.6        1                   35                   40   4.00
4  6.9        1                   45                   50   3.75
5  7.9       32                   40                   45   4.26
6  4.7      141      




----------------------------
Predictions = 
 [3.87158722 0.09534496 3.66120693 3.88271879 4.17896989 3.57462616
 3.71518472 0.09267043 3.83997801 3.58285154 3.72509269 3.57013683
 3.24663481 3.71633733 3.57640345 3.75334967 3.75960791 3.86755239
 3.87297783 3.79999041 4.11208339 4.21468538 0.06919579 3.5697965
 3.5732781  0.92045952 3.92992621 3.60480767 3.84983296 3.91002229
 3.744829   0.93154106 3.78525353 0.09627126 4.01162825 3.84586508
 3.76224802 3.73020408 3.74027044 3.32529312 3.73310039 3.80514986
 3.72509269 4.2284756  3.75161917 4.07548163 3.5697965  3.178862
 3.38291591 4.05524978 3.93950443 3.58285154 4.05868202 3.94021578
 3.67440595 3.73310039 0.09708641 3.79981332 3.78970317 3.63084579
 3.60012073 3.84364282 3.72192536 0.10027302 3.46353756 4.27119602
 3.67603143 3.6618162  3.69829291 3.5697965  3.95242528 3.91603431
 3.33042942 3.93357794 3.96309561 3.93425023 3.5467629  3.72909462
 4.0359174  3.70910729 3.74812125 3.70280664 3.56446058 3.66538873
 4.27622616 3.46025