# 로지스틱회귀분석 실습 2: 다범주분류문제

## 1. 모듈 불러오기

In [None]:
from IPython.display import display, HTML
import numpy as np
import pandas as pd

import statsmodels.api as sm
from sklearn.model_selection import train_test_split
import sklearn.preprocessing as preprocessing
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve

import pycm
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('Data/Wine.csv')
data.head()

## 2. 데이터 전처리 및 탐색적 데이터 분석

In [None]:
sns.pairplot(data, hue="Wine Type")
plt.show()

In [None]:
display(data['Wine Type'].value_counts())

In [None]:
train_data, test_data = train_test_split(data, test_size=0.3, random_state=55, stratify=data['Wine Type'])
print('Training')
display(train_data['Wine Type'].value_counts())
print('Testing')
display(test_data['Wine Type'].value_counts())

## 3. 모델링

## statsmodels 메뉴얼

<a href="https://www.statsmodels.org/stable/index.html">![alt text](Figures/statsmodels.png"") </a>


*우측 하단 Quick Search를 이용하여 검색 (그림 클릭시 사이트로 이동)

## Logit / MNLogit의 fit method

In [None]:
model = sm.MNLogit(train_data['Wine Type'], train_data.drop('Wine Type', axis=1))
model_fitted = model.fit(method='ncg')

In [None]:
model_fitted.summary()

## 4. 모델 결과물 확인

In [None]:
train_prob = model_fitted.predict(train_data.drop('Wine Type', axis=1))
train_results = pd.concat([train_prob, train_data['Wine Type']], axis=1)
train_results.columns = ['Class Probability 1', 'Class Probability 2', 'Class Probability 3', 'Wine Type']
display(train_results)

In [None]:
test_prob = model_fitted.predict(test_data.drop('Wine Type', axis=1))
test_results = pd.concat([test_prob, test_data['Wine Type']], axis=1)
test_results.columns = ['Class Probability 1', 'Class Probability 2', 'Class Probability 3', 'Wine Type']
display(test_results)

## 5. 확률이 가장 높은 클래스 할당

In [None]:
train_pred = train_prob.idxmax(axis=1)+1
test_pred = test_prob.idxmax(axis=1)+1

## 6. Classification 결과 평가

In [None]:
cm = pycm.ConfusionMatrix(test_data['Wine Type'].values, test_pred.values)
print(cm)