In [9]:
import pandas as pd
import numpy as np
import category_encoders as ce
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [7]:
# Read in the csv
df = pd.read_csv("../data/raw/beer_reviews.csv")
# Select necessary columns
df = df[['review_appearance', 'review_aroma', 'review_palate', 'review_taste', 'beer_abv', 'brewery_name', 'beer_name', 'beer_style']]
df.head()

Unnamed: 0,review_appearance,review_aroma,review_palate,review_taste,beer_abv,brewery_name,beer_name,beer_style
0,2.5,2.0,1.5,1.5,5.0,Vecchio Birraio,Sausa Weizen,Hefeweizen
1,3.0,2.5,3.0,3.0,6.2,Vecchio Birraio,Red Moon,English Strong Ale
2,3.0,2.5,3.0,3.0,6.5,Vecchio Birraio,Black Horse Black Beer,Foreign / Export Stout
3,3.5,3.0,2.5,3.0,5.0,Vecchio Birraio,Sausa Pils,German Pilsener
4,4.0,4.5,4.0,4.5,7.7,Caldera Brewing Company,Cauldron DIPA,American Double / Imperial IPA


In [8]:
# Handle missing values
df['beer_abv'].fillna(df['beer_abv'].median(), inplace=True)
df['brewery_name'].fillna('unknown', inplace=True)
df['beer_name'].fillna('unknown', inplace=True)

In [10]:
# Encode the beer_style column
le = LabelEncoder()
df['beer_style'] = le.fit_transform(df['beer_style'])

In [11]:
# Select columns to be encoded
cols_to_encode = ['brewery_name', 'beer_name']

# Instantiate encoder
encoder = ce.TargetEncoder(cols=cols_to_encode)

In [12]:
# Fit and transform the columns
df_encoded = encoder.fit_transform(df[cols_to_encode], df['beer_style'])

# Replace original columns with encoded ones in the dataframe
df.drop(cols_to_encode, axis=1, inplace=True)
df = pd.concat([df, df_encoded], axis=1)

In [13]:
# Assuming that df_X contains your features and df_y contains your target
df_X = df.drop('beer_style', axis=1)
df_y = df['beer_style']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.2, random_state=42)

In [14]:
# Initialize the constructor
model = Sequential()

# Add an input layer
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))

# Add one hidden layer 
model.add(Dense(64, activation='relu'))

# Add an output layer 
model.add(Dense(len(y_train.unique()), activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

2023-06-30 13:15:26.087602: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [15]:
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=256, validation_split=0.2, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
# Evaluate the model
accuracy = model.evaluate(X_test, y_test)[1]
print("Model Accuracy: %.2f%%" % (accuracy*100))

Model Accuracy: 74.44%
