In [None]:
import pandas as pd

# CSV 파일 로드
df = pd.read_csv('winemag-data-130k-v2.csv')

# 데이터 확인
df

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33603,33603,Italy,"Linear and elegant, this bears dusty tones of ...",Raiano,91,38.0,Southern Italy,Taurasi,,,,Villa Raiano 2008 Raiano (Taurasi),Aglianico,Villa Raiano
33604,33604,Germany,Honeyed marmalade and bitter orange-rind flavo...,Rüdesheim Berg Roseneck Erstes Gewächs,91,68.0,Rheingau,,,Anna Lee C. Iijima,,August Kesseler 2010 Rüdesheim Berg Roseneck E...,Riesling,August Kesseler
33605,33605,Australia,"This wine represents an excellent value, and o...",George Wyndham Founder's Reserve,91,20.0,South Australia,Langhorne Creek,,Joe Czerwinski,@JoeCz,Wyndham Estate 2008 George Wyndham Founder's R...,Shiraz,Wyndham Estate
33606,33606,US,"This is soft, smooth and refined, with rich, c...",Estate Grown and Bottled,91,25.0,California,Santa Ynez Valley,Central Coast,,,Zaca Mesa 2009 Estate Grown and Bottled Syrah ...,Syrah,Zaca Mesa


와인 리뷰 데이터를 활용해 품종을 통해 RED와 WHITE로 라벨링 후, 리뷰 데이터만으로 RED/WHITE 구분

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# 데이터 로드
df = pd.read_csv('winemag-data-130k-v2.csv')

# Red 와인 품종 리스트
red_wines = [
    'Cabernet Sauvignon', 'Merlot', 'Pinot Noir', 'Syrah',
    'Zinfandel', 'Malbec', 'Grenache', 'Sangiovese',
    'Tempranillo', 'Cabernet Franc', 'Barbera',
    'Petit Verdot', 'Nebbiolo', 'Montepulciano',
    'Aglianico', 'Dolcetto', 'Cinsault',
    'Tannat', 'Mourvèdre', 'Red Blend'
]

# White 와인 품종 리스트
white_wines = [
    'Chardonnay', 'Sauvignon Blanc', 'Riesling', 'Pinot Grigio',
    'Gewürztraminer', 'Moscato', 'Viognier', 'Semillon',
    'Albariño', 'Chenin Blanc', 'Verdelho', 'Trebbiano',
    'Fiano', 'Grillo', 'Garganega', 'White Blend'
]

# NaN 값을 빈 문자열로 대체
df['variety'] = df['variety'].fillna('')

# Red/White 구분 컬럼 추가
df['wine_type'] = df['variety'].apply(
    lambda x: 'red' if any(wine in x for wine in red_wines) else (
        'white' if any(wine in x for wine in white_wines) else 'unknown'
    )
)

# 이진 분류를 위한 타겟 변수 처리 (1차원 배열로 변경)
df['wine_type_encoded'] = df['wine_type'].apply(lambda x: 1 if x == 'red' else 0)
y_binary = df['wine_type_encoded'].values  # to_categorical 제거

# 텍스트 데이터 전처리
max_words = 10000  # 사용할 최대 단어 수
max_len = 100  # 시퀀스 길이

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['description'])

# 시퀀스를 패딩 처리하여 동일한 길이로 만듦
X = tokenizer.texts_to_sequences(df['description'])
X = pad_sequences(X, maxlen=max_len)

# 데이터셋 분리
X_train_bin, X_test_bin, y_train_bin, y_test_bin = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# 모델 정의 및 학습
model_bin = Sequential()
model_bin.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model_bin.add(LSTM(64, return_sequences=False))
model_bin.add(Dropout(0.5))
model_bin.add(Dense(1, activation='sigmoid'))  # 이진 분류를 위한 sigmoid 함수
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True) # 검증 손실이 개선되지 않을 때 훈련을 중지

# 모델 컴파일
model_bin.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 모델 학습
history_bin = model_bin.fit(X_train_bin, y_train_bin, epochs=20, batch_size=64, validation_split=0.2, callbacks=[early_stopping])

# 모델 평가
loss, accuracy = model_bin.evaluate(X_test_bin, y_test_bin)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")


Epoch 1/20




[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 9ms/step - accuracy: 0.8534 - loss: 0.3422 - val_accuracy: 0.9089 - val_loss: 0.2432
Epoch 2/20
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - accuracy: 0.9196 - loss: 0.2145 - val_accuracy: 0.9063 - val_loss: 0.2416
Epoch 3/20
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9303 - loss: 0.1854 - val_accuracy: 0.9102 - val_loss: 0.2341
Epoch 4/20
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 10ms/step - accuracy: 0.9386 - loss: 0.1604 - val_accuracy: 0.9121 - val_loss: 0.2401
Epoch 5/20
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 9ms/step - accuracy: 0.9461 - loss: 0.1386 - val_accuracy: 0.9076 - val_loss: 0.2605
Epoch 6/20
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9544 - loss: 0.1201 - val_accuracy: 0.9042 - val_loss: 0.2903
Epoch 7/20
[1m1300/

사용자가 와인 설명을 입력하면, 모델이 입력된 설명을 기반으로 레드/화이트 와인인지 예측, 예측된 와인 타입에 따라 해당 타입의 와인 품종 중 하나를 무작위로 추천, 추천된 와인 품종에 맞는 음식 페어링을 출력

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
import random

# 데이터 로드
df = pd.read_csv('winemag-data-130k-v2.csv')

# Red 와인 품종 리스트
red_wines = [
    'Cabernet Sauvignon', 'Merlot', 'Pinot Noir', 'Syrah',
    'Zinfandel', 'Malbec', 'Grenache', 'Sangiovese',
    'Tempranillo', 'Cabernet Franc', 'Barbera',
    'Petit Verdot', 'Nebbiolo', 'Montepulciano',
    'Aglianico', 'Dolcetto', 'Cinsault',
    'Tannat', 'Mourvèdre', 'Red Blend'
]

# White 와인 품종 리스트
white_wines = [
    'Chardonnay', 'Sauvignon Blanc', 'Riesling', 'Pinot Grigio',
    'Gewürztraminer', 'Moscato', 'Viognier', 'Semillon',
    'Albariño', 'Chenin Blanc', 'Verdelho', 'Trebbiano',
    'Fiano', 'Grillo', 'Garganega', 'White Blend'
]

# NaN 값을 빈 문자열로 대체
df['variety'] = df['variety'].fillna('')

# Red/White 구분 컬럼 추가
df['wine_type'] = df['variety'].apply(
    lambda x: 'red' if any(wine in x for wine in red_wines) else (
        'white' if any(wine in x for wine in white_wines) else 'unknown'
    )
)

# 이진 분류를 위한 타겟 변수 처리
df['wine_type_encoded'] = df['wine_type'].apply(lambda x: 1 if x == 'red' else 0)
y_binary = df['wine_type_encoded'].values

# 텍스트 데이터 전처리
max_words = 10000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['description'])

X = tokenizer.texts_to_sequences(df['description'])
X = pad_sequences(X, maxlen=max_len)

# 데이터셋 분리
X_train_bin, X_test_bin, y_train_bin, y_test_bin = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# 모델 정의 및 학습
model_bin = Sequential()
model_bin.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model_bin.add(LSTM(64, return_sequences=False))
model_bin.add(Dropout(0.5))
model_bin.add(Dense(1, activation='sigmoid'))

model_bin.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 모델 학습
history_bin = model_bin.fit(X_train_bin, y_train_bin, epochs=20, batch_size=64, validation_split=0.2)

# 와인 품종에 따른 추천 음식 페어링 사전
food_pairings = {
    'Cabernet Sauvignon': 'Grilled steak, lamb',
    'Merlot': 'Roast chicken, pasta',
    'Pinot Noir': 'Duck, mushroom risotto',
    'Syrah': 'BBQ ribs, spicy sausages',
    'Zinfandel': 'BBQ chicken, pizza',
    'Malbec': 'Grilled meats, burgers',
    'Grenache': 'Lamb, paella',
    'Sangiovese': 'Pasta, pizza',
    'Tempranillo': 'Grilled vegetables, chorizo',
    'Cabernet Franc': 'Roasted vegetables, turkey',
    'Barbera': 'Pasta, risotto',
    'Petit Verdot': 'Beef stew, dark chocolate',
    'Nebbiolo': 'Truffles, braised beef',
    'Montepulciano': 'Roasted meats, cheese',
    'Aglianico': 'Grilled meats, hearty stews',
    'Dolcetto': 'Pasta, grilled meats',
    'Cinsault': 'Salmon, grilled chicken',
    'Tannat': 'Strong cheeses, grilled meats',
    'Mourvèdre': 'Lamb, Mediterranean dishes',
    'Red Blend': 'Grilled meats, cheese platters',
    'Chardonnay': 'Chicken, seafood',
    'Sauvignon Blanc': 'Salad, goat cheese',
    'Riesling': 'Spicy Asian food, pork',
    'Pinot Grigio': 'Shrimp, light pasta dishes',
    'Gewürztraminer': 'Spicy foods, Thai cuisine',
    'Moscato': 'Fruit desserts, light cheeses',
    'Viognier': 'Grilled fish, roasted chicken',
    'Semillon': 'Shellfish, poultry',
    'Albariño': 'Seafood, tapas',
    'Chenin Blanc': 'Pork, spicy foods',
    'Verdelho': 'Asian cuisine, grilled chicken',
    'Trebbiano': 'Fish, light salads',
    'Fiano': 'Grilled vegetables, fish',
    'Grillo': 'Pasta, seafood',
    'Garganega': 'Seafood, risotto',
    'White Blend': 'Grilled vegetables, chicken'
}

# 사용자가 입력한 와인 설명으로 예측
user_description = input("Enter a wine description: ")

# 입력된 텍스트 전처리
user_seq = tokenizer.texts_to_sequences([user_description])
user_seq = pad_sequences(user_seq, maxlen=max_len)

# 와인 종류 예측
wine_type_prediction = model_bin.predict(user_seq)
predicted_type = 'red' if wine_type_prediction > 0.5 else 'white'

# 예측된 타입에 따른 품종 추천
if predicted_type == 'red':
    recommended_variety = random.choice(red_wines)
else:
    recommended_variety = random.choice(white_wines)

# 추천된 품종에 따른 음식 페어링 제공
if recommended_variety in food_pairings:
    print(f"Recommended variety: {recommended_variety}")
    print(f"Recommended food pairing: {food_pairings[recommended_variety]}")
else:
    print(f"No pairing found for {recommended_variety}.")


Epoch 1/20




[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step - accuracy: 0.8556 - loss: 0.3453 - val_accuracy: 0.9078 - val_loss: 0.2434
Epoch 2/20
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.9196 - loss: 0.2169 - val_accuracy: 0.9114 - val_loss: 0.2301
Epoch 3/20
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 9ms/step - accuracy: 0.9326 - loss: 0.1786 - val_accuracy: 0.9129 - val_loss: 0.2285
Epoch 4/20
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 9ms/step - accuracy: 0.9401 - loss: 0.1531 - val_accuracy: 0.9122 - val_loss: 0.2368
Epoch 5/20
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9476 - loss: 0.1333 - val_accuracy: 0.9134 - val_loss: 0.2664
Epoch 6/20
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - accuracy: 0.9552 - loss: 0.1136 - val_accuracy: 0.9064 - val_loss: 0.2784
Epoch 7/20
[1m1300/1



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
Recommended variety: Garganega
Recommended food pairing: Seafood, risotto


Cabernet Sauvignon, Merlot, Pinot Noir, Chardonnay, Riesling, Sauvignon Blanc
이 6가지 품종으로만 다중 분류 및 사용자가 와인 설명을 입력하면, 모델이 입력된 설명을 기반으로 6가지 품종으로 구분 후, 페어링 음식 추천

---
대표 레드와인 품종 3가지
화이트와인 품종 3가지



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import random

# 데이터 로드
df = pd.read_csv('winemag-data-130k-v2.csv')

# 6가지 와인 품종 리스트
target_varieties = ['Cabernet Sauvignon', 'Merlot', 'Pinot Noir', 'Chardonnay', 'Riesling', 'Sauvignon Blanc']

# 나머지는 'unknown'으로 처리
df['variety'] = df['variety'].apply(lambda x: x if x in target_varieties else 'unknown')

# 'unknown' 데이터 제거
df = df[df['variety'] != 'unknown']

# 품종을 정수로 인코딩
df['variety_encoded'] = df['variety'].astype('category').cat.codes

# 타겟 변수를 원-핫 인코딩
y_multi = to_categorical(df['variety_encoded'].values)

# 텍스트 데이터 전처리
max_words = 10000  # 사용할 최대 단어 수
max_len = 100  # 시퀀스 길이

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['description'])

# 시퀀스를 패딩 처리하여 동일한 길이로 만듦
X = tokenizer.texts_to_sequences(df['description'])
X = pad_sequences(X, maxlen=max_len)

# 데이터셋 분리
X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(X, y_multi, test_size=0.2, random_state=42)

# 모델 정의 및 학습
model_multi = Sequential()
model_multi.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model_multi.add(LSTM(64, return_sequences=False))
model_multi.add(Dropout(0.5))
model_multi.add(Dense(64, activation='relu'))
model_multi.add(Dropout(0.5))
model_multi.add(Dense(len(target_varieties), activation='softmax'))  # 다중 분류를 위한 softmax 함수

# 모델 컴파일
model_multi.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 조기 종료 설정
#early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# 모델 학습
history_multi = model_multi.fit(X_train_multi, y_train_multi, epochs=20, batch_size=64, validation_split=0.2)

# 모델 평가
loss, accuracy = model_multi.evaluate(X_test_multi, y_test_multi)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# 6가지 품종에 따른 음식 페어링 사전
food_pairings = {
    'Cabernet Sauvignon': 'Grilled steak, lamb',
    'Merlot': 'Roast chicken, pasta',
    'Pinot Noir': 'Duck, mushroom risotto',
    'Chardonnay': 'Chicken, seafood',
    'Riesling': 'Spicy Asian food, pork',
    'Sauvignon Blanc': 'Salad, goat cheese'
}

# 사용자가 입력한 와인 설명으로 예측
user_description = input("Enter a wine description: ")

# 입력된 텍스트 전처리
user_seq = tokenizer.texts_to_sequences([user_description])
user_seq = pad_sequences(user_seq, maxlen=max_len)

# 와인 품종 예측
wine_type_prediction = model_multi.predict(user_seq)
predicted_variety_index = np.argmax(wine_type_prediction)

# 예측된 와인 품종
predicted_variety = target_varieties[predicted_variety_index]

# 예측된 품종에 따른 음식 페어링 제공
if predicted_variety in food_pairings:
    print(f"Recommended variety: {predicted_variety}")
    print(f"Recommended food pairing: {food_pairings[predicted_variety]}")
else:
    print(f"No pairing found for {predicted_variety}.")


Epoch 1/20




[1m478/478[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.4495 - loss: 1.3186 - val_accuracy: 0.7699 - val_loss: 0.5739
Epoch 2/20
[1m478/478[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.8004 - loss: 0.5400 - val_accuracy: 0.8202 - val_loss: 0.4908
Epoch 3/20
[1m478/478[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - accuracy: 0.8591 - loss: 0.3855 - val_accuracy: 0.8228 - val_loss: 0.4806
Epoch 4/20
[1m478/478[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.8756 - loss: 0.3433 - val_accuracy: 0.8457 - val_loss: 0.4828
Epoch 5/20
[1m478/478[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.9068 - loss: 0.2772 - val_accuracy: 0.8359 - val_loss: 0.5087
Epoch 6/20
[1m478/478[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.9217 - loss: 0.2230 - val_accuracy: 0.8508 - val_loss: 0.5100
Epoch 7/20
[1m478/478[0m [32m━

gpt에게네덜란드 배비치 블랙 소비뇽 블랑의 영어로 된 와인 리뷰를 한개만 가져다 줘 질문



Here is an English wine review for the Babich Black Label Sauvignon Blanc 2023:

The wine is described as inviting and musky, with aromas of gooseberry, mandarin, and a hint of fennel on the nose. On the palate, it is fresh and lively with flavors of lemon, lime, gooseberry, and blackcurrant. The mid-palate develops with additional flavors of grapefruit and honeydew melon, while pineapple is noticeable on the finish. The wine is characterized as soft and layered, with a lingering spicy finish

이 와인은 매력적이고 머스크 향이 난다고 묘사되며, 코에서는 구스베리, 만다린, 약간의 회향 향이 느껴집니다. 입안에서는 신선하고 생기 넘치며, 레몬, 라임, 구스베리, 블랙커런트의 풍미가 있습니다. 중간 맛에서는 자몽과 허니듀 멜론의 추가적인 맛이 발전하며, 마무리에서는 파인애플이 두드러집니다. 이 와인은 부드럽고 층이 있는 특징을 가지며, 매운 맛이 오래 지속되는 마무리를 가지고 있습니다.