<a href="https://colab.research.google.com/github/sirjit2/AIclass/blob/main/Worksheet7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error



In [2]:
X, y = load_diabetes(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [3]:
lr = LinearRegression()
lr.fit(X_train, y_train)

print("Baseline Linear Regression")
print("Train MSE:",
      mean_squared_error(y_train, lr.predict(X_train)))
print("Test MSE:",
      mean_squared_error(y_test, lr.predict(X_test)))
print("Coefficients:", lr.coef_)


Baseline Linear Regression
Train MSE: 2868.549702835577
Test MSE: 2900.193628493482
Coefficients: [  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588
  518.06227698  163.41998299  275.31790158  736.1988589    48.67065743]


In [4]:
ridge = Ridge()
ridge_params = {"alpha": [0.01, 0.1, 1, 10, 100]}

ridge_cv = GridSearchCV(ridge, ridge_params, cv=5)
ridge_cv.fit(X_train, y_train)

ridge_best = ridge_cv.best_estimator_

print("\nRidge Regression")
print("Best alpha:", ridge_cv.best_params_)
print("Train MSE:",
      mean_squared_error(y_train, ridge_best.predict(X_train)))
print("Test MSE:",
      mean_squared_error(y_test, ridge_best.predict(X_test)))
print("Coefficients:", ridge_best.coef_)



Ridge Regression
Best alpha: {'alpha': 0.1}
Train MSE: 2912.983541587901
Test MSE: 2856.4868876706537
Coefficients: [  42.85566976 -205.49431899  505.08903304  317.0932049  -108.50026183
  -86.23673333 -190.36318008  151.70708637  392.28931896   79.9081772 ]


In [5]:
lasso = Lasso(max_iter=10000)
lasso_params = {"alpha": [0.01, 0.1, 1, 10]}

lasso_cv = GridSearchCV(lasso, lasso_params, cv=5)
lasso_cv.fit(X_train, y_train)

lasso_best = lasso_cv.best_estimator_

print("\nLasso Regression")
print("Best alpha:", lasso_cv.best_params_)
print("Train MSE:",
      mean_squared_error(y_train, lasso_best.predict(X_train)))
print("Test MSE:",
      mean_squared_error(y_test, lasso_best.predict(X_test)))
print("Coefficients:", lasso_best.coef_)



Lasso Regression
Best alpha: {'alpha': 0.1}
Train MSE: 2935.25823259759
Test MSE: 2798.193485169719
Coefficients: [   0.         -152.66477923  552.69777529  303.36515791  -81.36500664
   -0.         -229.25577639    0.          447.91952518   29.64261704]


In [6]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [7]:
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [8]:
log_reg = LogisticRegression(max_iter=10000)
log_reg.fit(X_train, y_train)

print("\nBaseline Logistic Regression")
print("Train Accuracy:",
      accuracy_score(y_train, log_reg.predict(X_train)))
print("Test Accuracy:",
      accuracy_score(y_test, log_reg.predict(X_test)))
print("Coefficients:", log_reg.coef_)



Baseline Logistic Regression
Train Accuracy: 0.9582417582417583
Test Accuracy: 0.956140350877193
Coefficients: [[ 1.0274368   0.22145051 -0.36213488  0.0254667  -0.15623532 -0.23771256
  -0.53255786 -0.28369224 -0.22668189 -0.03649446 -0.09710208  1.3705667
  -0.18140942 -0.08719575 -0.02245523  0.04736092 -0.04294784 -0.03240188
  -0.03473732  0.01160522  0.11165329 -0.50887722 -0.01555395 -0.016857
  -0.30773117 -0.77270908 -1.42859535 -0.51092923 -0.74689363 -0.10094404]]


In [9]:
params = {
    "C": [0.01, 0.1, 1, 10],
    "penalty": ["l1", "l2"]
}

log_reg_cv = GridSearchCV(
    LogisticRegression(solver="liblinear", max_iter=10000),
    params,
    cv=5
)

log_reg_cv.fit(X_train, y_train)

best_log_reg = log_reg_cv.best_estimator_

print("\nTuned Logistic Regression")
print("Best Params:", log_reg_cv.best_params_)
print("Train Accuracy:",
      accuracy_score(y_train, best_log_reg.predict(X_train)))
print("Test Accuracy:",
      accuracy_score(y_test, best_log_reg.predict(X_test)))
print("Coefficients:", best_log_reg.coef_)



Tuned Logistic Regression
Best Params: {'C': 10, 'penalty': 'l2'}
Train Accuracy: 0.9692307692307692
Test Accuracy: 0.956140350877193
Coefficients: [[ 4.48835633  0.27196032 -0.51946446 -0.0074426  -0.72144586 -0.69521094
  -1.74176288 -1.6439379  -0.89114892  0.03691798 -0.31545108  3.35632861
  -0.88727336 -0.07335493 -0.11096036  0.80035637  0.92249598 -0.11617823
  -0.02500433  0.14611177  0.51777817 -0.6428525   0.16484437 -0.02768451
  -1.40296201 -1.61471735 -2.97856699 -2.6019605  -2.99483671 -0.02661842]]
