In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [None]:
data = pd.read_csv('../input/avocado-prices/avocado.csv')

In [None]:
data

In [None]:
data.drop(data.columns[0], axis=1, inplace=True)

In [None]:
data.info()

# Visualization

In [None]:
plt.figure(figsize=(20, 10))

for i in range(len(data.columns)):
    if data.dtypes[i] != 'object':
        plt.subplot(3, 5, i + 1)
        plt.boxplot(data[data.columns[i]], vert=False)
        plt.title(data.columns[i])
        
plt.show()

# Preprocessing

In [None]:
data.isna().sum()

## Encoding

In [None]:
def get_uniques(df, columns):
    return {column: list(df[column].unique()) for column in columns}

In [None]:
categorical_columns = ['region', 'Date', 'type']

get_uniques(data, categorical_columns)

In [None]:
ordinal_features = ['Date']

nominal_features = ['region']

target_column = 'type'

In [None]:
date_ordering = sorted(data['Date'].unique())

In [None]:
def ordinal_encode(df, column, ordering):
    df = df.copy()
    df[column] = df[column].apply(lambda x: ordering.index(x))
    return df


def onehot_encode(df, column):
    df = df.copy()
    dummies = pd.get_dummies(df[column])
    df = pd.concat([df, dummies], axis=1)
    df.drop(column, axis=1, inplace=True)
    return df

In [None]:
data = ordinal_encode(data, 'Date', date_ordering)

In [None]:
data = onehot_encode(data, 'region')

In [None]:
data

In [None]:
label_encoder = LabelEncoder()

data[target_column] = label_encoder.fit_transform(data[target_column])

## Splitting and Scaling

In [None]:
y = data[target_column]
X = data.drop(target_column, axis=1)

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
y.shape

In [None]:
X.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)

# Training

In [None]:
inputs = tf.keras.Input(65,)
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)


model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['accuracy']
)

batch_size = 64
epochs = 73

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
    verbose=0
)

# Results

In [None]:
plt.figure(figsize=(14, 10))

epochs_range = range(1, epochs + 1)
train_loss = history.history['loss']
val_loss = history.history['val_loss']

plt.plot(epochs_range, train_loss, label="Training Loss")
plt.plot(epochs_range, val_loss, label="Validation Loss")

plt.title("Training and Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.show()

In [None]:
np.argmin(val_loss) + 1

In [None]:
model.evaluate(X_test, y_test)

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/CAKPtccHETk