In [36]:
# 교차 검증 단순화
# scikit-learn의 model_selection 모듈 내에 모델 검증관련 기능 활용
# 교차 검증 데이터기반 검증 결과 처리

In [37]:
import pandas as pd
fishDF=pd.read_csv("../data/fish.csv")
irisDF=pd.read_csv("../data/iris.csv")

In [38]:
# 피쳐, 타겟 분리 
fish_target=fishDF[fishDF.columns[1]]
fish_feature=fishDF[fishDF.columns[2:]]
iris_target=irisDF[irisDF.columns[4]]
iris_feature=irisDF[irisDF.columns[1:4]]

In [39]:
from sklearn.model_selection import train_test_split
fish_x_train,fish_x_test,fish_y_train,fish_y_test=train_test_split(fish_feature,fish_target,test_size=0.2,random_state=5)
iris_x_train,iris_x_test,iris_y_train,iris_y_test=train_test_split(iris_feature,iris_target,test_size=0.2,random_state=5)

In [40]:
from sklearn.preprocessing import StandardScaler

fish_Scaler=StandardScaler()
fish_Scaler.fit(fish_x_train)
fish_x_train=fish_Scaler.transform(fish_x_train)
fish_x_test=fish_Scaler.transform(fish_x_test)

iris_Scaler=StandardScaler()
iris_Scaler.fit(iris_x_train)
iris_x_train=iris_Scaler.transform(iris_x_train)
iris_x_test=iris_Scaler.transform(iris_x_test)

In [41]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import cross_validate, cross_val_score, cross_val_predict

In [42]:
lr_model=LinearRegression()

In [43]:
result = cross_validate(lr_model, fish_x_train, fish_y_train, cv=3,
                        return_train_score=True, # 훈련 성적 리턴
                        return_estimator=True, # 모델 정보 리턴 
                        scoring=('r2', 'neg_mean_squared_error')) 
result

{'fit_time': array([0.00100017, 0.00100017, 0.0010004 ]),
 'score_time': array([0.00100017, 0.        , 0.00100017]),
 'estimator': [LinearRegression(), LinearRegression(), LinearRegression()],
 'test_r2': array([0.90955466, 0.85168628, 0.82057897]),
 'train_r2': array([0.87015912, 0.8908385 , 0.90834446]),
 'test_neg_mean_squared_error': array([-10682.40110761, -15025.75544999, -27033.9317572 ]),
 'train_neg_mean_squared_error': array([-16614.59978578, -14707.39326494, -10106.26009911])}

In [44]:
resultDF=pd.DataFrame(result)
resultDF

Unnamed: 0,fit_time,score_time,estimator,test_r2,train_r2,test_neg_mean_squared_error,train_neg_mean_squared_error
0,0.001,0.001,LinearRegression(),0.909555,0.870159,-10682.401108,-16614.599786
1,0.001,0.0,LinearRegression(),0.851686,0.890838,-15025.75545,-14707.393265
2,0.001,0.001,LinearRegression(),0.820579,0.908344,-27033.931757,-10106.260099


In [45]:
best_model=resultDF.iloc[0]["estimator"]
best_model.coef_

array([ 414.7976385 , -198.37922687,  101.75864282,   43.20686639])

In [46]:
cross_val_score(lr_model, fish_x_train, fish_y_train)

array([0.92104683, 0.84385378, 0.88592423, 0.64671954, 0.79031905])

In [47]:
cross_val_predict(lr_model, fish_x_train, fish_y_train)

array([ 9.09792517e+01,  9.85612151e+01,  3.87029719e+02,  1.13011547e+02,
        6.81676563e+02,  2.82456988e+02,  5.34379642e+02,  3.61848302e+02,
        6.12934598e+02,  1.70756130e+02,  5.53222970e+02,  1.69433076e+01,
       -2.53895688e+01,  8.14926155e+02,  6.97225129e+01,  3.38157931e+02,
        4.76306355e+02,  7.67659158e+02,  6.55686457e+02,  1.80300946e+02,
        8.45315559e+02,  2.92145322e+02,  6.08539351e+02,  9.02782406e+02,
        6.99788981e+02,  9.40316876e+02,  7.47628344e+02,  3.28419355e+02,
        7.89622699e+02,  9.09130831e+02, -1.98986854e+02,  1.81089559e+02,
        6.36731679e+02, -1.09209894e+02,  3.57087822e+02,  7.88250361e+02,
        3.25180589e+02,  6.56473977e+02, -2.37032025e+02,  4.55882834e+01,
        9.57130255e+01, -2.10830505e+02,  1.28969696e+02, -2.21199132e+02,
       -1.10282630e+02,  6.39911566e+02,  2.12288357e+02,  2.41098815e+02,
        2.61932359e+02, -2.58301758e+02,  2.93250859e+01,  8.87950700e+02,
        2.46460034e+02,  

In [50]:
from sklearn.model_selection import GridSearchCV 
# 모델에 대해서 파라미터를 딕셔너리 형태로 넣으면,
# for문 돌리듯이 모델에 파라미터를 번갈아가면서 넣으면서 학습을 진행시킨다! 
from sklearn.linear_model import LogisticRegression

est=LogisticRegression(max_iter=10000)
params={'penalty':["l1","l2","elasticnet"],
        'solver' : ["lbfgs","liblinear","newton-cg","newton-cholesky","sag","saga"]}
gscv=GridSearchCV(est,params,cv=5)
gscv.fit(iris_x_train,iris_y_train

50 fits failed out of a total of 90.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\envs\EXAM_ML\lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\ProgramData\anaconda3\envs\EXAM_ML\lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\ProgramData\anaconda3\envs\EXAM_ML\lib\site-packages\sklearn\linear_model\_logistic.py", line 1168, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\ProgramData\anaconda3\envs\EXAM_ML\lib\site-packages\skl

In [51]:
gscv.best_estimator_

In [52]:
gscv.best_score_

0.9663043478260871

In [54]:
gscv.best_index_

5

In [55]:
best_model=gscv.best_estimator_ # 가장 좋았던 모델을 best_model에 저장

In [56]:
from sklearn.utils.discovery import all_estimators
allAlgorithms=all_estimators(type_filter='regressor')

In [66]:
for model_name, model in allAlgorithms:
    try :
        print(model.fit(fish_x_train, fish_y_train)) # 중간에 에러나 난다면, 예외처리를 해주면 된다! 
    except Exception as e:
        pass