In [None]:
!pip install plotly

Collecting plotly
  Downloading https://files.pythonhosted.org/packages/fd/db/003b5cfbc710f4d4982440451185b952269e4080a57ae7e760a2ceb8ce0c/plotly-3.6.1-py2.py3-none-any.whl (38.6MB)
[K    76% |████████████████████████▌       | 29.5MB 588kB/s eta 0:00:16   21% |██████▉                         | 8.3MB 594kB/s eta 0:00:51

In [1]:
import math
import numpy as np 
import pandas as pd
from pandas import ExcelWriter, ExcelFile 
from sklearn.linear_model import Lasso, Ridge, ElasticNet, LassoCV
from sklearn.metrics import mean_absolute_error, accuracy_score
from sklearn.model_selection import train_test_split, KFold
import plotly.plotly as py 
import plotly.tools as tls 
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import seaborn as sns

ModuleNotFoundError: No module named 'plotly'

In [None]:
file_location='C:/Users/seul/Desktop/새 폴더/대학원/학회'

In [None]:
hotel_score = pd.read_csv('11_xy1.csv', encoding='euc-kr')#cp949

In [None]:
hotel_score.head()

In [None]:
hotel_score = hotel_score.drop('Unnamed: 0',1)

In [None]:
hotel_score.rename(columns={'Unnamed: 1578':'rating'},inplace=True)

In [None]:
hotel_score.head()

#### undersampling을 통해 Sample 추출 

In [None]:
plt.hist(hotel_score.rating)

In [None]:
X = hotel_score.drop(['rating'], axis=1).values
y = hotel_score[['rating']].values

In [None]:
def classify_label(preds):
    labels = []
    for pred in preds:
        if pred > 0:
            labels.append(1)
        else:
            labels.append(-1)
    return labels

In [None]:
def lasso(X_train, y_train, X_test, y_test, alpha):
    clf = Lasso(alpha)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    y_pred_labeled = classify_label(y_pred)
    return accuracy_score(y_test, y_pred_labeled)

In [None]:
def ridge(X_train, y_train, X_test, y_test, alpha):
    clf = Ridge(alpha)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    y_pred_labeled = classify_label(y_pred)
    return accuracy_score(y_test, y_pred_labeled)

In [None]:
def elastic(X_train, y_train, X_test, y_test, alpha):
    clf = ElasticNet(alpha)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    y_pred_labeled = classify_label(y_pred)
    return accuracy_score(y_test, y_pred_labeled)

In [None]:
def adaptive_lasso(X_train, y_train, X_test, y_test, alpha):
    gprime = lambda w: 1. / (2. * np.sqrt(np.abs(w)) + np.finfo(float).eps)
    n_samples, n_features = X_train.shape
    weights = np.ones(n_features)
    n_lasso_iterations = 5
    for k in range(n_lasso_iterations):
        X_w = X_train / weights[np.newaxis, :]
        clf = Lasso(alpha=alpha, fit_intercept=False)
        clf.fit(X_w, y_train)
        coef_ = clf.coef_ / weights
        weights = gprime(coef_)
        
    y_pred = clf.predict(X_test)
    y_pred_labeled = classify_label(y_pred)
    return accuracy_score(y_test, y_pred_labeled)

In [None]:
def main():
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

    alpha = 10
    lasso_acc = lasso(X_train, y_train, X_test, y_test, alpha)
    ridge_acc = ridge(X_train, y_train, X_test, y_test, alpha)
    elastic_acc = elastic(X_train, y_train, X_test, y_test, alpha)
    adaptive_acc = adaptive_lasso(X_train, y_train, X_test, y_test, alpha)
    print("Lasso's Accuracy: {:.4f} Ridge's Accuracy : {:.4f} Elastic's Accuracy : {:.4f} Adaptive's Accuracy : {:.4f}".format(lasso_acc, ridge_acc, elastic_acc, adaptive_acc))
    print("Finish!")

In [None]:
if __name__ == '__main__':
    main()

#### Tuning hyperparameter  -  Cross valdation

In [None]:
# K-fold Cross Validation
def cv(X, y, k_fold):
    kf = KFold(n_splits=k_fold)

    X_train_folds, X_test_folds, y_train_folds, y_test_folds = dict(), dict(), dict(), dict()
    for fold_index, (train_index, test_index) in enumerate(kf.split(X)):
        X_train_folds[fold_index] = X[train_index]
        y_train_folds[fold_index] = y[train_index]
        X_test_folds[fold_index] = X[test_index]
        y_test_folds[fold_index] = y[test_index]
    return X_train_folds, y_train_folds, X_test_folds, y_test_folds

In [None]:
k_fold = 5
X_train_folds, y_train_folds, X_test_folds, y_test_folds = cv(X, y, k_fold)

In [None]:
alphas = np.logspace(-3, -2, 1) # alphas = np.logspace(-3,2)
alphas

In [None]:
t = time()
lasso_results, ridge_results, elastic_results, adaptive_results = dict(), dict(), dict(), dict()
for alpha in alphas:
    lasso_cv, ridge_cv, elastic_cv, adaptive_cv = [], [], [], []
    for fold_index in range(k_fold):
        print("Fold : {}, Lambda : {}".format(fold_index+1, alpha))
        lasso_acc, ridge_acc, elastic_acc, adaptive_acc = 0, 0, 0, 0
        lasso_acc = lasso2(X_train_folds[fold_index], y_train_folds[fold_index], X_test_folds[fold_index],
                          y_test_folds[fold_index], alpha)
        ridge_acc = ridge2(X_train_folds[fold_index], y_train_folds[fold_index], X_test_folds[fold_index],
                          y_test_folds[fold_index], alpha)
        elastic_acc = elastic2(X_train_folds[fold_index], y_train_folds[fold_index], X_test_folds[fold_index],
                              y_test_folds[fold_index], alpha)
        adaptive_acc = adaptive_lasso2(X_train_folds[fold_index], y_train_folds[fold_index],
                                      X_test_folds[fold_index], y_test_folds[fold_index], alpha)
        lasso_cv.append(lasso_acc)
        ridge_cv.append(ridge_acc)
        elastic_cv.append(elastic_acc)
        adaptive_cv.append(adaptive_acc)
        print("Lasso's Accuracy: {:.4f} Ridge's Accuracy : {:.4f} Elastic's Accuracy : {:.4f} Adaptive's Accuracy : {:.4f}".format(lasso_acc, ridge_acc, elastic_acc, adaptive_acc))
    lasso_cv_acc = np.mean(lasso_cv, axis=0)
    ridge_cv_acc = np.mean(ridge_cv, axis=0)
    elastic_cv_acc = np.mean(elastic_cv, axis=0)
    adaptive_cv_acc = np.mean(adaptive_cv, axis=0)
    lasso_results[alpha] = lasso_cv_acc
    ridge_results[alpha] = ridge_cv_acc
    elastic_results[alpha] = elastic_cv_acc
    adaptive_results[alpha] = adaptive_cv_acc
    print("[Average of {}-fold] Lasso : {:.4f} Ridge : {:.4f} Elastic : {:.4f} Adaptive : {:.4f}".format(k_fold, lasso_cv_acc, ridge_cv_acc, elastic_cv_acc, adaptive_cv_acc))
lasso_lambda = max(lasso_results, key=lasso_results.get)
ridge_lambda = max(ridge_results, key=ridge_results.get)
elastic_lambda = max(elastic_results, key=elastic_results.get)
adaptive_lambda = max(adaptive_results, key=adaptive_results.get)
print("Lasso's optimal lambda value : {}".format(lasso_lambda))
print("Ridge's optimal lambda value : {}".format(ridge_lambda))
print("ElasticNet's optimal lambda value : {}".format(elastic_lambda))
print("Adaptive Lasso's optimal lambda value : {}".format(adaptive_lambda))
print("Finish!")

In [None]:
def extract_key_and_value(results):
    return [*results], list(results.values()), 

In [None]:
x_lasso, y_lasso = extract_key_and_value(lasso_results)
x_ridge, y_ridge = extract_key_and_value(ridge_results)
x_elastic, y_elastic = extract_key_and_value(elastic_results)
x_adaptive, y_adaptive = extract_key_and_value(adaptive_results)

In [None]:
plt.rcParams["figure.figsize"] = [25,16]

box = dict(facecolor='pink', pad=5, alpha=0.3)

plt.subplot(2,2,1)
plt.plot(x_lasso, y_lasso, '-ko')
plt.xlabel(r'$\lambda$') #lambda
plt.ylabel('Lasso Score',bbox=box)
plt.xscale('log')
sns.despine(offset=15)

plt.subplot(2,2,2)
plt.plot(x_ridge, y_ridge, '-ko')
plt.xlabel(r'$\lambda$') #lambda
plt.ylabel('Ridge Score',bbox=box)
plt.xscale('log')

labelx=-0.3

plt.subplot(2, 2, 3)
plt.plot(x_elastic, y_elastic, '-ko')
plt.xlabel(r'$\lambda$') #lambda
plt.ylabel('Elastic Score',bbox=box)
plt.xscale('log')
sns.despine(offset=15)

plt.subplot(2,2,4)
plt.plot(x_adaptive, y_adaptive, '-ko')
plt.xlabel(r'$\lambda$') #lambda
plt.ylabel('Adaptive Score',bbox=box)
plt.xscale('log')
sns.despine(offset=15)

### Final 

In [None]:
def main():
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

    lasso_alpha = 0.013257113655901095
    ridge_alpha = 1.4563484775012443
    elastic_alpha = 0.02682695795279726
    adaptvie_lasso = 0.0012648552168552957
    
    lasso_acc = lasso(X_train, y_train, X_test, y_test, lasso_alpha)
    ridge_acc = ridge(X_train, y_train, X_test, y_test, ridge_alpha)
    elastic_acc = elastic(X_train, y_train, X_test, y_test, elastic_alpha)
    adaptive_acc = adaptive_lasso(X_train, y_train, X_test, y_test, adaptvie_lasso)
    print("Lasso's Accuracy: {:.4f} Ridge's Accuracy : {:.4f} Elastic's Accuracy : {:.4f} Adaptive's Accuracy : {:.4f}".format(lasso_acc, ridge_acc, elastic_acc, adaptive_acc))
    print("Finish!")

In [None]:
if __name__ == '__main__':
    main()

![hahah](totoro.png)