In [15]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, HashingVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import pandas as pd
import pickle
import requests, json

In [16]:
def fine_accuracy(alpha):

    # 기사 데이터 프레임 로드
    article_df = pd.read_pickle("article_2016-06-01.plk")

    # 테스트 데이터와 트레인 데이터 분리
    X_train, X_test, y_train, y_test = train_test_split(article_df.content, article_df.category, test_size=0.1, random_state=1)

    # vectorizer와 classification algorithm 설정
    clf = Pipeline([
        ('vect', TfidfVectorizer()), 
        ('clf', MultinomialNB(alpha=alpha)) 
    ])

    # 모델 생성
    model = clf.fit(X_train, y_train) 

    # 테스트 데이터 예측 결과 출력
    y_pred = model.predict(X_test)

    # 정확도 확인
    result = accuracy_score(y_test, y_pred)
    send_slack("alpha:{}, accuracy:{}".format(alpha, result))
    return result

In [17]:
def send_slack(msg, channel="#dss", username="model_bot" ):
    webhook_URL = "https://hooks.slack.com/services/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"
    payload = {
        "channel": channel,
        "username": username,
        "icon_emoji": ":provision:",
        "text": msg,
    }
    response = requests.post(
        webhook_URL,
        data = json.dumps(payload),
    )

In [18]:
# 변수값 설정
alphas = [0.1, 0.01, 0.001]
for alpha in alphas:
    print(alpha, fine_accuracy(alpha))

0.1 0.7939914163090128
0.01 0.8798283261802575
0.001 0.8669527896995708
