## Import the dependencies


In [1]:
import pandas as pd
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


## Load the dataset

In [2]:
data = pd.read_csv("datasets/prostate_cancer.txt")
data                   

Unnamed: 0,id,lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45,lpsa,train
0,1,-0.579818,2.769459,50,-1.386294,0,-1.386294,6,0,-0.430783,T
1,2,-0.994252,3.319626,58,-1.386294,0,-1.386294,6,0,-0.162519,T
2,3,-0.510826,2.691243,74,-1.386294,0,-1.386294,7,20,-0.162519,T
3,4,-1.203973,3.282789,58,-1.386294,0,-1.386294,6,0,-0.162519,T
4,5,0.751416,3.432373,62,-1.386294,0,-1.386294,6,0,0.371564,T
...,...,...,...,...,...,...,...,...,...,...,...
92,93,2.830268,3.876396,68,-1.386294,1,1.321756,7,60,4.385147,T
93,94,3.821004,3.896909,44,-1.386294,1,2.169054,7,40,4.684443,T
94,95,2.907447,3.396185,52,-1.386294,1,2.463853,7,10,5.143124,F
95,96,2.882564,3.773910,68,1.558145,1,1.558145,7,80,5.477509,T


## Split dataset into 'Train' and 'Test' parts

In [3]:
# x -> input | y -> output
x, y = data.drop(['id', 'lpsa', 'train'], axis=1), data[['lpsa']] 

# Which ones to take for training
is_train = (data.train == 'T') 

# Divide into "Train" and "Test" parts according to 'is_train'
x_train, x_test = x[is_train], x[~is_train]
y_train, y_test = y[is_train], y[~is_train]

## Train the model | Measure Error Metrics

In [4]:
# Pre-Processing
scaler = StandardScaler()
scaler.fit(x_train)

x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

# Train the model using 'Train' data
model = Lasso(alpha=0.046)  
model.fit(x_train_scaled, y_train)

# Predict the 'Test' data
y_pred = model.predict(x_test_scaled)

# Calculate the error metrics
mse = mean_squared_error(y_test, y_pred)
mas = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse}')
print(f'MAS: {mas}')
print(f'R^2: {r2}')

MSE: 0.45604434196550814
MAS: 0.5012092881594118
R^2: 0.565524451608898
