# Linear Regresion (`Least Squares`)

In [1]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

df = load_boston()
X = df.data
y = df.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 66)

lr = LinearRegression().fit(X_train, y_train)
print('training accuration score {:.3f}'.format(lr.score(X_train, y_train)))
print('testing accuration score {:.3f}'.format(lr.score(X_test, y_test)))

training accuration score 0.713
testing accuration score 0.817


In [2]:
lr.coef_

array([-1.21562371e-01,  4.88527086e-02,  2.75554234e-02,  2.13319694e+00,
       -1.92165700e+01,  3.25484414e+00, -5.02296307e-03, -1.66093472e+00,
        2.97130376e-01, -1.19953846e-02, -9.93870868e-01,  8.37850481e-03,
       -5.41527316e-01])

In [3]:
lr.intercept_

43.10815624477739

# `Ridge`

In [4]:
rr = Ridge(alpha=1).fit(X_train, y_train) #using L2 regularization
print('training accuration score Ridge{:.3f}'.format(rr.score(X_train, y_train)))
print('testing accuration score Ridge{:.3f}'.format(rr.score(X_test, y_test)))

training accuration score Ridge0.710
testing accuration score Ridge0.818


# `Lasso`

In [5]:
import numpy as np

ls = Lasso(alpha=0.01).fit(X_train, y_train) #using L2 regularization
print('training accuration score Lasso: {:.3f}'.format(ls.score(X_train, y_train)))
print('testing accuration score Lasso: {:.3f}'.format(ls.score(X_test, y_test)))
print('Number of Feature used: %d' % np.sum(ls.coef_ != 0))

training accuration score Lasso: 0.712
testing accuration score Lasso: 0.817
Number of Feature used: 13


In [6]:
list(zip(df.feature_names, ls.coef_))

[('CRIM', -0.11955542311798423),
 ('ZN', 0.04954048119806585),
 ('INDUS', 0.014291889188823171),
 ('CHAS', 1.9643327716471601),
 ('NOX', -15.970235263968421),
 ('RM', 3.2546515535631473),
 ('AGE', -0.007098339626442624),
 ('DIS', -1.6086082319822566),
 ('RAD', 0.2913152531290013),
 ('TAX', -0.01243546197276764),
 ('PTRATIO', -0.9566630187146957),
 ('B', 0.008402853141678759),
 ('LSTAT', -0.5479084202444382)]

# `Linear Classification`

## `Logistic Regression`

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import MinMaxScaler

df = load_breast_cancer()
df.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [8]:
df.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [9]:
X_train, X_test, y_train, y_test = train_test_split(df.data, df.target)

#Scaling data for better result
scale = MinMaxScaler()
X_train_scale = scale.fit_transform(X_train)
X_test_scale = scale.transform(X_test)

lgr = LogisticRegression(C=5).fit(X_train_scale, y_train)
print('Training Score: {:.3f}' .format(lgr.score(X_train_scale, y_train)))
print('Testing Score: {:.3f}' .format(lgr.score(X_test_scale, y_test)))

Training Score: 0.988
Testing Score: 0.965


In [10]:
svc = LinearSVC(C=3).fit(X_train_scale, y_train)
print('Training Score SVC: {:.3f}' .format(svc.score(X_train_scale, y_train)))
print('Testing Score SVC: {:.3f}' .format(svc.score(X_test_scale, y_test)))

Training Score SVC: 0.991
Testing Score SVC: 0.958
