In [25]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.losses import SparseCategoricalCrossentropy
from tensorflow.python.keras import Sequential
from tensorflow.python.keras.activations import relu, linear
from tensorflow.python.keras.regularizers import L2

In [26]:
data = pd.read_csv('./leaf-classification/train.csv')
data.head()

Unnamed: 0,id,species,margin1,margin2,margin3,margin4,margin5,margin6,margin7,margin8,...,texture55,texture56,texture57,texture58,texture59,texture60,texture61,texture62,texture63,texture64
0,1,Acer_Opalus,0.007812,0.023438,0.023438,0.003906,0.011719,0.009766,0.027344,0.0,...,0.007812,0.0,0.00293,0.00293,0.035156,0.0,0.0,0.004883,0.0,0.025391
1,2,Pterocarya_Stenoptera,0.005859,0.0,0.03125,0.015625,0.025391,0.001953,0.019531,0.0,...,0.000977,0.0,0.0,0.000977,0.023438,0.0,0.0,0.000977,0.039062,0.022461
2,3,Quercus_Hartwissiana,0.005859,0.009766,0.019531,0.007812,0.003906,0.005859,0.068359,0.0,...,0.1543,0.0,0.005859,0.000977,0.007812,0.0,0.0,0.0,0.020508,0.00293
3,5,Tilia_Tomentosa,0.0,0.003906,0.023438,0.005859,0.021484,0.019531,0.023438,0.0,...,0.0,0.000977,0.0,0.0,0.020508,0.0,0.0,0.017578,0.0,0.047852
4,6,Quercus_Variabilis,0.005859,0.003906,0.048828,0.009766,0.013672,0.015625,0.005859,0.0,...,0.09668,0.0,0.021484,0.0,0.0,0.0,0.0,0.0,0.0,0.03125


In [27]:
species = pd.Series(data['species'])
ids = pd.Series(data['id'])

In [28]:
unique_species = pd.Series(species.unique())
unique_species

0                Acer_Opalus
1      Pterocarya_Stenoptera
2       Quercus_Hartwissiana
3            Tilia_Tomentosa
4         Quercus_Variabilis
               ...          
94        Ulmus_Bergmanniana
95             Quercus_Nigra
96            Salix_Intergra
97    Quercus_Infectoria_sub
98               Sorbus_Aria
Length: 99, dtype: object

In [29]:
unique_species_ids = {unique_species[i]:i for i in range(len(unique_species))}
unique_species_ids
len(unique_species)

99

In [30]:
X_train = data.drop(['species', 'id'], axis=1)
X_train = np.array(X_train)

In [31]:
y_train = [unique_species_ids[name] for name in data['species']]
y_train = np.array(y_train)

In [32]:
model = Sequential([
    Dense(units=600, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=580, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=550, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=520, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=400, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=450, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=300, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=290, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=280, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=250, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=220, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=200, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=180, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=150, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=120, activation='relu', kernel_regularizer=L2(0.0001)),
    Dense(units=99, activation='linear', kernel_regularizer=L2(0.0001))
])

In [33]:
model.compile(optimizer='adam', loss=SparseCategoricalCrossentropy(from_logits=True))

In [34]:
model.fit(X_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1af859060b0>

In [35]:
test_data = pd.read_csv('./leaf-classification/test.csv')
test_data_array = np.array(test_data.drop(['id'], axis=1))
logits = model(test_data_array)
f_x = tf.nn.softmax(logits)

In [36]:
f_x

<tf.Tensor: shape=(594, 99), dtype=float32, numpy=
array([[0.0101077 , 0.01013226, 0.01012769, ..., 0.01015682, 0.01010019,
        0.01014242],
       [0.0101077 , 0.01013226, 0.01012769, ..., 0.01015682, 0.01010019,
        0.01014242],
       [0.0101077 , 0.01013226, 0.01012769, ..., 0.01015682, 0.01010019,
        0.01014242],
       ...,
       [0.0101077 , 0.01013226, 0.01012769, ..., 0.01015682, 0.01010019,
        0.01014242],
       [0.0101077 , 0.01013226, 0.01012769, ..., 0.01015682, 0.01010019,
        0.01014242],
       [0.0101077 , 0.01013226, 0.01012769, ..., 0.01015682, 0.01010019,
        0.01014242]], dtype=float32)>

In [37]:
submission = pd.DataFrame(f_x, columns=unique_species, index=test_data['id'])

In [38]:
submission.to_csv('./leaf-classification/submission.csv')