# Titanic Dataset
Mix of categorical and numerical

In [None]:
from mixnn import datasets

features, (X_train, y_train), (X_validation, y_validation) = datasets.load_titanic_data()

print('Features:')
for feature in features:
    print(feature)
print('---------------------------------')
print('Training: ', X_train.shape)
print('Validation: ', X_validation.shape)
print('---------------------------------')
print("2 rows of training data:")
X_train[:2,...]

In [None]:
from mixnn.model import MixNNClassifier

classifier = MixNNClassifier(features, max_embedding_size=2)
classifier.fit(
    X_train, y_train,
    validation_data=(X_validation, y_validation),
)

# MNIST handwritten digit
Only images

In [None]:
from mixnn import datasets

features, (X_train, y_train), (X_validation, y_validation) = datasets.load_mnist_digits_data()

print('Features:')
for feature in features:
    print(feature)
print('---------------------------------')
print('Training: ', X_train.shape)
print('Validation: ', X_validation.shape)
print('---------------------------------')
print("2 rows of training data:")
X_train[:2,...]

In [None]:
from mixnn.model import MixNNClassifier

classifier = MixNNClassifier(features, batch_size=512)
classifier.fit(
    X_train, y_train,
    validation_data=(X_validation, y_validation),
)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix,  accuracy_score

y_pred = classifier.predict(X_validation)

print('Accuracy: %s' % accuracy_score(y_validation, y_pred))
print(classification_report(y_validation, y_pred))
confusion_matrix(y_validation, y_pred)

# MNIST Skin Cancer

In [None]:
import pandas as pd

df = pd.read_csv('/DATA/skin-cancer-mnist-ham10000/HAM10000_metadata.csv')
df['lesion_image'] = df['image_id'].map(lambda image_id: '/DATA/skin-cancer-mnist-ham10000/images/%s.jpg' % image_id)
df.head()

In [None]:
nb_duplicates = df['lesion_id'].duplicated().sum()
print('Removing %s/%s duplicates' % (nb_duplicates, df.shape[0]))
df = df.drop_duplicates(subset='lesion_id')

In [None]:
# TODO: 128/224 !?
image_size = (128, 128, 3)
features = [
    #{"name": "dx_type", "type": "categorical"},
    # {"name": "sex", "type": "categorical"},
    # {"name": "age", "type": "numerical"},
    # {"name": "localization", "type": "categorical"},
    {"name": "lesion_image", "type": "image", "cnn": "medium", "image_size": image_size},
]

numericals = [feature['name'] for feature in features if feature['type'] == 'numerical']
categorials = [feature['name'] for feature in features if feature['type'] == 'categorical']
images = [feature['name'] for feature in features if feature['type'] == 'image']
target = 'dx'

In [None]:
for column in categorials:
    df[column] = df[column].fillna("N/A")
    
for column in numericals:
    df[column] = df[column].fillna(df[column].median())

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

plt.figure()
sns.countplot(x=target, data=df)

for column in categorials:
    plt.figure()
    sns.countplot(x=column, data=df)

if numericals:
    plt.figure()
    sns.pairplot(df[numericals + [target]], hue=target)

In [None]:
from mixnn.preprocessing import ImageEncoder
from matplotlib.pyplot import imshow

encoder = ImageEncoder(image_size)
classes = df['dx'].unique()
nb_classes = len(classes)
nb_images = 5

fig = plt.figure(figsize=(30, 30))
for i, clazz in enumerate(classes):
    class_images = df[df['dx'] == clazz].sample(nb_images)['lesion_image'].values
    for j, img_path in enumerate(class_images):
        img = encoder.load_image(img_path)
        ax = fig.add_subplot(nb_classes, nb_images, i * nb_images + j + 1)
        ax.title.set_text('%s - %s' % (clazz, j))
        ax.imshow(img)

In [None]:
from sklearn.model_selection import train_test_split

X = df[[f['name'] for f in features]].values
y = df['dx'].values

X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.2, random_state=101, stratify=y)

print('Training: ', X_train.shape)
print('Validation: ', X_validation.shape)

print("2 rows of training data:")
X_train[:2,...]

In [None]:
from mixnn.model import MixNNClassifier

import warnings
warnings.simplefilter('ignore')

estimator = MixNNClassifier(
    features, 
    fc_layers=0, 
    batch_size=8,
    early_stopping_rounds=4,
)
history = estimator.fit(
    X_train, y_train,
    validation_data=(X_validation, y_validation),
    class_weight='auto',
    
    #plot=True,
)
pd.DataFrame(history.history).plot.line(figsize=(16, 6))