목표 : 모델을 파일로 저장하여 전달하는 방법

In [1]:
from sklearn.datasets import load_iris

In [2]:
iris_data = load_iris()

In [3]:
iris_data.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [4]:
iris_data['target_names']

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [5]:
import pandas as pd

iris_df = pd.DataFrame(iris_data['data'], columns=iris_data['feature_names'])
iris_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB


In [6]:
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(iris_data['data'], iris_data['target'], 
                                                    random_state=42, test_size= 0.2, stratify=iris_data['target'])

In [8]:
from sklearn.preprocessing import MinMaxScaler

mms = MinMaxScaler()
X_train_scaled = mms.fit_transform(X_train)
X_test_scaled = mms.transform(X_test)

In [9]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()
logreg.fit(X_train_scaled, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [11]:
logreg.score(X_train_scaled, y_train), logreg.score(X_test_scaled, y_test)

(0.9333333333333333, 0.9)

In [12]:
logreg.predict(X_test_scaled)

array([0, 2, 1, 1, 0, 2, 0, 0, 2, 1, 2, 2, 2, 1, 0, 0, 0, 1, 1, 2, 0, 2,
       1, 1, 2, 2, 1, 0, 2, 0])

In [13]:
# 객체를 파일로 저장
import pickle

# 모델 저장
with open('data-files/iris-model.pkl', 'wb') as f:
    pickle.dump(logreg, f)
    
# scaler 저장
with open('data-files/iris-scaler.pkl', 'wb') as f:
    pickle.dump(mms, f)

In [14]:
# 모델 저장
import joblib

joblib.dump(logreg, 'data-files/iris-model2.pkl')
joblib.dump(mms, 'data-files/iris-scaler2.pkl')

['data-files/iris-scaler2.pkl']

In [15]:
# 저장된 모델을 불러와서 다시 사용
with open('data-files/iris-model.pkl', 'rb') as f:
    logreg2 = pickle.load(f)

In [16]:
print(logreg2.predict(X_test_scaled[:10]))
print(y_test[:10])

[0 2 1 1 0 2 0 0 2 1]
[0 2 1 1 0 1 0 0 2 1]
