In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols, glm

In [None]:
# importing data
red_wine = pd.read_csv('/kaggle/input/wine-quality/wineQualityReds.csv')
red_wine["type"] = "1"
red_wine.head()

In [None]:
white_wine = pd.read_csv('/kaggle/input/wine-quality/wineQualityWhites.csv')
white_wine["type"] = "0"
white_wine.head()

In [None]:
wine = pd.concat([red_wine, white_wine], axis=0)
wine.head()

In [None]:
#변수별 요약통계
wine.describe()

In [None]:
# quality열에서 유니크한 값 
print(wine.quality.unique())
print(sorted(wine.quality.unique()))


In [None]:
# quality열에서 유일한 값별 관측값 개수를 내럼차순으로 정렬하여 출력
# quality6가 가장 많음. 
print(wine.quality.value_counts())


In [None]:
# 와인 종류에 따른 기술 통계를 출력하기
wine.groupby('type')[['quality']].describe()


In [None]:
wine.groupby('type')[['quality']].describe().unstack('type') #가로방향으로 재구조화


In [None]:
# 와인종류에 따른 품질의 분포 확인하기
red_wine = wine.loc[wine['type']=='1', 'quality']
white_wine = wine.loc[wine['type']=='0', 'quality']
print(red_wine.head())
print(white_wine.head())


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
#빈도분포대신 밀도 분포로 표시
sns.set_style("dark")
print(sns.distplot(red_wine, \
		norm_hist=True, kde=False, color="red", label="Red wine"))
print(sns.distplot(white_wine, \
		norm_hist=True, kde=False, color="white", label="White wine"))
plt.xlabel("Quality Score")
plt.ylabel("Density")
plt.title("Distribution of Quality by Wine Type")
plt.legend()
plt.show()


#두 그룹의 와인모두의 품질은 근사적으로 정규분표를 따르고 있음
#분포를 히스토그램을 사용하면 단순히 요약통계를 보는 것보다 두 그룹의 분포를 보다 명확하게 알 수 있음


In [None]:
import seaborn as sns
corr = wine.corr()
fig = plt.figure(figsize=(12,9))
sns.heatmap(corr, 
            vmax=0.4, vmin=-0.4,linewidths=1, annot=True,
            xticklabels=corr.columns.values,
            yticklabels=corr.columns.values)
plt.show()


In [None]:
# Split Train and Test

from sklearn.model_selection import train_test_split
import numpy as np

# Specify the data 
X= wine[['fixed.acidity', 'volatile.acidity', 'citric.acid', 'residual.sugar', 'chlorides', 'free.sulfur.dioxide', 'total.sulfur.dioxide', 'density', 'pH', 'sulphates', 'alcohol','type']].values  # 12 dimensions

# Specify the target labels and flatten the array 
y=np.ravel(wine.quality)


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

sc = StandardScaler()
X_norm = sc.fit_transform(X)

In [None]:
# Split the data up in train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.33, random_state=42)

In [None]:
#X_train=X_train.astype('float')
#X_train

In [None]:
#X_test=X_test.astype('float')
#X_test

In [None]:
#y_train=y_train.astype('float')
#y_train

In [None]:
#y_test=y_test.astype('float')
#y_test

In [None]:
#sns.boxplot(data = X_train) 


In [None]:
#sns.boxplot(data = X_test)


In [None]:
# Data Model

#import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense, Input, Dropout

# Initialize the constructor
model = Sequential()

# Add an input layer 
model.add(Dense(128, activation='relu', input_shape=(12,)))

#model.add(Dropout(0.3))

# Add one hidden layer 
model.add(Dense(16, activation='relu'))

# Add an output layer 
model.add(Dense(1, activation='relu'))


In [None]:
#X_train

In [None]:
y_train = y_train.reshape(len(y_train), 1)
y_test = y_test.reshape(len(y_test), 1)

In [None]:
#y_train

In [None]:
model.input_shape


In [None]:
model.output_shape


In [None]:
model.summary()

In [None]:
model.compile(loss='binary_crossentropy',
              optimizer='Adam',
              metrics=['accuracy'])
                   
history = model.fit(X_train, y_train,epochs=20, batch_size=10, validation_data=(X_test, y_test), verbose=1)


In [None]:
print(history.history.keys())
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


In [None]:
score = model.evaluate(X_test, y_test,verbose=1)
print(score)
