# Getting Started

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [None]:
data = pd.read_csv("../input/chocolate-bar-ratings/flavors_of_cacao.csv")

In [None]:
data

In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(data.corr(), annot=True)
plt.show()

In [None]:
data.drop(['REF', 'Review\nDate'], axis=1, inplace=True)

In [None]:
data

# Preprocesing

In [None]:
data.isnull().sum()

In [None]:
data = data.dropna(axis=0)

In [None]:
data.dtypes

In [None]:
data.columns = ['Company', 'SpecificOrigin', 'CocoaPercent', 'Location', 'Rating', 'BeanType', 'BroadOrigin']

In [None]:
data

In [None]:
def removePercents(data):
    return data.apply(lambda x: float(x.strip('%')) / 100)

In [None]:
data['CocoaPercent'] = removePercents(data['CocoaPercent'])

In [None]:
len(data['SpecificOrigin'].unique())

In [None]:
categorical_features = ['Company', 'SpecificOrigin', 'Location', 'BeanType', 'BroadOrigin']

In [None]:
def onehot_encode(data, columns):
    for column in columns:
        dummies = pd.get_dummies(data[column])
        data = pd.concat([data, dummies], axis=1)
        data.drop(column, axis=1, inplace=True)
    return data

In [None]:
data = onehot_encode(data, categorical_features)

In [None]:
y = data['Rating']
X = data.drop('Rating', axis=1)

In [None]:
X

In [None]:
scaler = MinMaxScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Training

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [None]:
inputs = tf.keras.Input(shape=(1655,))
x = tf.keras.layers.Dense(16, activation='relu')(inputs)
x = tf.keras.layers.Dense(16, activation='relu')(x)
outputs = tf.keras.layers.Dense(1)(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [None]:
optimizer = tf.keras.optimizers.RMSprop(0.001)

model.compile(
    optimizer=optimizer,
    loss='mse'
)

In [None]:
model.summary()

In [None]:
epochs = 10
batch_size = 32

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    epochs=epochs,
    batch_size=batch_size,
    verbose=1
)

In [None]:
plt.figure(figsize=(14, 10))

plt.plot(range(epochs), history.history['loss'], color='b')
plt.plot(range(epochs), history.history['val_loss'], color='r')

plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.show()

In [None]:
np.argmin(history.history['val_loss'])

In [None]:
model.evaluate(X_test, y_test)