### Import Lybraries

In [24]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score

### Load Diabetes Dataset from scikit library

In [2]:
dataset = load_diabetes()

print(dataset.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, total serum cholesterol
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, total cholesterol / HDL
      - s5      ltg, possibly log of serum triglycerides level
      - s6      glu, blood sugar level

Note: Each of these 1

### View the features

In [3]:
# columns
dataset.feature_names

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

### Prepare the data for fitting into the SVR method 

In [25]:
X = dataset.data
# print(X)
y = dataset.target

X.shape, y.shape

((442, 10), (442,))

In [26]:
# split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 1)

In [27]:
X_train.shape, y_train.shape

((353, 10), (353,))

### Create and train the method 

In [17]:
regr = SVR(kernel='linear')
regr.fit(X_train, y_train)

## Make prediction 

In [28]:
# make prediction with the test dataset
y_pred = regr.predict(X_test)

In [31]:
# compare the label values of testing set with the predicted values
result = pd.DataFrame({'y_test':y_test, 'y_pred':y_pred})
result

Unnamed: 0,y_test,y_pred
0,78.0,140.477030
1,152.0,138.505878
2,200.0,141.306853
3,59.0,138.417707
4,311.0,142.136975
...,...,...
84,64.0,139.217056
85,107.0,140.978923
86,49.0,139.768993
87,60.0,138.298798


###  Compute the mean squared error and the coefficient of determination

In [30]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the results
print("Mean squared error: {:.2f}".format(mse))
print("Coefficient of determination (R^2): {:.2f}".format(r2))

Mean squared error: 5249.31
Coefficient of determination (R^2): 0.01
