In [2]:
import numpy as np
import tensorflow as tf

In [27]:
model_name = 'temp_cnn_sampling_weighted'

### Load the data

In [49]:
NP_DATA_PATH = '/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/prepared_data/original_data.npz'

In [50]:
data = np.load(NP_DATA_PATH)
features = data['features']
field_id = data['field_id']
crop_id = data['crop_id']

features.shape, field_id.shape, crop_id.shape

((67557, 13, 14), (67557,), (67557,))

### Normalizing the data

In [51]:
mean_f = features.mean(axis=(0,1))
std_f = features.std(axis=(0,1))

mean_f, std_f

(array([0.04495004, 0.05522608, 0.08520251, 0.08335783, 0.13549377,
        0.25449966, 0.29252816, 0.29083016, 0.31650684, 0.31871104,
        0.26029542, 0.17459822, 0.5274778 , 3.82845028]),
 array([ 0.02151336,  0.023685  ,  0.02388389,  0.03328747,  0.03058358,
         0.0464853 ,  0.05491788,  0.05862865,  0.05730477,  0.05223269,
         0.05640826,  0.05882478,  0.18024394, 17.91834208]))

In [52]:
features = (features - mean_f)/std_f

## Spatial sampling

In [53]:
agg_methods = {
    'min': np.mean,
    'max': np.max,
    'mean': np.mean,
    'std': np.std
}

def sample_field(features, N=10, n_samples=1, methods=['mean']):
    agg_features_stack = []
    indices = np.arange(len(features))
    for _ in range(n_samples):
        sampled_i = np.random.choice(indices, size=N, replace=True)

        sampled_features = features[sampled_i]
        
        aggreagations = [agg_methods[method](sampled_features, axis=0) for method in methods]
        agg_features = np.concatenate(aggreagations, axis=-1)
        
        agg_features_stack.append(agg_features)
    
    return np.stack(agg_features_stack)

In [54]:
from tqdm.auto import tqdm

def sample_dataset(features, labels, field_ids, n=10, methods=['mean']):
    fields, counts = np.unique(field_ids, return_counts=True)
    
    features_sampled = []
    labels_sampled = []
    field_id_sampled = []
    weights_sampled = []
    for fid, c in zip(tqdm(fields), counts):
        n_samples = max(c//n, 3)

        field_mask = field_ids == fid
        field_features = features[field_mask]
        field_label = labels[field_mask][0]
        
        sampled_features = sample_field(field_features, N=n, n_samples=n_samples, methods=methods)
        sampled_labels = np.full(n_samples, field_label)
        sampled_fid = np.full(n_samples, fid)
        sampled_weights = np.full(n_samples, 1.0/n_samples)
        
        features_sampled.append(sampled_features)
        labels_sampled.append(sampled_labels)
        field_id_sampled.append(sampled_fid)
        weights_sampled.append(sampled_weights)
    
    features = np.concatenate(features_sampled, axis=0)
    labels = np.concatenate(labels_sampled, axis=0)
    field_ids = np.concatenate(field_id_sampled, axis=0)
    weights = np.concatenate(weights_sampled, axis=0)
    
    return features, labels, field_ids, weights

In [55]:
features_s, crop_id_s, field_id_s, weights_s = sample_dataset(features, crop_id, field_id, methods=['mean', 'min', 'max'])

HBox(children=(IntProgress(value=0, max=4688), HTML(value='')))




In [56]:
features_s.shape

(14755, 13, 42)

In [57]:
# features_s = features_s[:, 5:11 ,:]

### Extract training data and split train, val

In [58]:
mask = crop_id_s != 0
features_valid = features_s[mask]
labels_valid = crop_id_s[mask] - 1
field_id_valid = field_id_s[mask]
weights_valid = weights_s[mask]

features_test = features_s[~mask]
field_id_test = field_id_s[~mask]

features_valid.shape, features_test.shape

((10363, 13, 42), (4392, 13, 42))

In [59]:
# # Compute field split and save
# val_ratio = 0.1
# random_state = np.random.RandomState(seed=42)

# fields = np.unique(field_id_valid)
# random_state.shuffle(fields)

# val_i = int(val_ratio * len(fields))
# fields_train = fields[val_i:]
# fields_val = fields[:val_i]
# fields_test = np.unique(field_id_test)

# # Save split
# np.save('/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/split/train.npy', fields_train)
# np.save('/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/split/val.npy', fields_val)
# np.save('/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/split/test.npy', fields_test)
# fields_train.shape, fields_val.shape, fields_test.shape

In [60]:
# Load training field IDs (split)
fields_train = np.load('/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/split/train.npy')

In [61]:
train_mask = np.isin(field_id_valid, fields_train)
print(f'Ratio of pixels in train: {np.mean(train_mask)}')

features_train = features_valid[train_mask]
labels_train = labels_valid[train_mask]
field_id_train = field_id_valid[train_mask]
weights_train = weights_valid[train_mask]

features_val = features_valid[~train_mask]
labels_val = labels_valid[~train_mask]
field_id_val = field_id_valid[~train_mask]
weights_val = weights_valid[~train_mask]

features_train.shape, features_val.shape

Ratio of pixels in train: 0.8999324519926662


((9326, 13, 42), (1037, 13, 42))

In [62]:
# Flattened features
features_train_flat = features_train.reshape((features_train.shape[0], -1))
features_val_flat = features_val.reshape((features_val.shape[0], -1))

### Train model

In [63]:
from eoflow.models import TempCNNModel, BiRNN, TransformerEncoder

In [64]:
# TempCNN model

model_config = {
    'learning_rate': 0.1,
    'n_classes': 7,
    'keep_prob': 0.5,
    'nb_conv_stacks': 2
}
model = TempCNNModel(model_config)

In [65]:
# BiRNN model

# model_config = {
#     'learning_rate': 0.1,
#     'n_classes': 7,
#     'rnn_layer': 'lstm',
#     'keep_prob': 0.5
# }
# model = BiRNN(model_config)

In [66]:
# Dense Model

# model = tf.keras.Sequential([
#     tf.keras.layers.Dense(256, activation='relu'),
#     tf.keras.layers.Dropout(rate=0.5),
#     tf.keras.layers.Dense(7, activation='softmax')
# ])

In [67]:
# Class weights
# classes = np.unique(labels_valid, return_counts=True)
# class_probs = classes[1]/np.sum(classes[1])
# class_inv_probs = 1.0 / class_probs
# weights = {i:inv_prob for i, inv_prob in enumerate(class_inv_probs)}
# weights

In [68]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [69]:
# Temporal
model.fit(
    x=features_train, 
    y=labels_train, 
    validation_data=(features_val, labels_val),
    batch_size=256,
    epochs=300,
    sample_weight=weights_train
)

# Non-temporal
# model.fit(
#     x=features_train_flat, 
#     y=labels_train, 
#     validation_data=(features_val_flat, labels_val),
#     batch_size=1024,
#     epochs=10)

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 13, 42)]          0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 13, 16)            3376      
_________________________________________________________________
activation_6 (Activation)    (None, 13, 16)            0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 13, 16)            0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 13, 16)            1296      
_________________________________________________________________
activation_7 (Activation)    (None, 13, 16)            0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 13, 16)            0   

  ...
    to  
  ['...']


Train on 9326 samples, validate on 1037 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300


Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 13

Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300
Epoch 208/300
Epoch 209/300
Epoch 210/300
Epoch 211/300
Epoch 212/300
Epoch 213/300
Epoch 214/300
Epoch 215/300
Epoch 216/300
Epoch 217/300
Epoch 218/300
Epoch 219/300
Epoch 220/300
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300
Epoch 226/300
Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 

Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


<tensorflow.python.keras.callbacks.History at 0x7f93c5fd7940>

### Random forest

In [34]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss

random_state = 7
rf = RandomForestClassifier(n_estimators=500, random_state=random_state, n_jobs=-1)

rf.fit(features_train_flat, labels_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=-1, oob_score=False, random_state=7, verbose=0,
                       warm_start=False)

### Predictions

In [35]:
preds_rf = rf.predict_proba(features_val_flat)
preds_dnn = model.predict(features_val)

In [36]:
preds_rf.shape, preds_dnn.shape

((1037, 7), (1037, 7))

In [37]:
def one_hot(labels, num_classes):
    """ One-hot encodes integer labels for a given number of classes. """
    
    one_hot_labels = np.zeros((labels.size, num_classes))
    one_hot_labels[np.arange(labels.size),labels] = 1
    
    return one_hot_labels

In [38]:
import pandas as pd
def field_predictions(predictions, labels, field_ids):
    """ Groups pixel-wise predictions into field predictions by averaging. """
    
    df = pd.DataFrame(predictions)
    df['CROP_ID'] = labels
    df['FIELD_ID'] = field_ids
    
    df_grouped = df.groupby('FIELD_ID').mean()
    field_preds = df_grouped.drop('CROP_ID', axis=1).values
    field_labels = df_grouped['CROP_ID'].values
    
    return field_preds, field_labels, df_grouped.index

In [39]:
# Create onehot labels for val
one_hot_labels_val = one_hot(labels_val, 7)

### Pixel log loss

In [40]:
rf_loss = log_loss(one_hot_labels_val, preds_rf)
dnn_loss = log_loss(one_hot_labels_val, preds_dnn)

print(f'Random forest: {rf_loss}')
print(f'DNN: {dnn_loss}')

Random forest: 1.2297453785931451
DNN: 1.1278737355345252


### Field log loss

In [41]:
preds_rf_field, lbl_rf_field, field_id_rf = field_predictions(preds_rf, labels_val, field_id_val)
preds_dnn_field, lbl_dnn_field, field_id_dnn = field_predictions(preds_dnn, labels_val, field_id_val)

In [42]:
lbl_rf_field_oh = one_hot(lbl_rf_field, 7)
lbl_dnn_field_oh = one_hot(lbl_dnn_field, 7)

In [43]:
rf_loss_field = log_loss(lbl_rf_field_oh, preds_rf_field)
dnn_loss_field = log_loss(lbl_dnn_field_oh, preds_dnn_field)

print(f'Random forest: {rf_loss_field}')
print(f'DNN: {dnn_loss_field}')

Random forest: 1.2418810512159248
DNN: 1.1261859473391869


## Prepare predictions for analysis

In [44]:
import pandas as pd

In [45]:
def prepare_predictions(preds, labels, field_ids):
    preds_l = np.argmax(preds, axis=1)

    df = pd.DataFrame(preds, columns=[f'Crop_ID_{i+1}' for i in range(7)])
    df['Field_ID'] = field_ids
    df['prediction'] = preds_l+1
    df['label'] = labels+1
    
    df = df.set_index('Field_ID')
    
    return df

In [46]:
df_rf = prepare_predictions(preds_rf_field, lbl_rf_field, field_id_rf)
df_dnn = prepare_predictions(preds_dnn_field, lbl_dnn_field, field_id_dnn)
df_rf

Unnamed: 0_level_0,Crop_ID_1,Crop_ID_2,Crop_ID_3,Crop_ID_4,Crop_ID_5,Crop_ID_6,Crop_ID_7,prediction,label
Field_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0.342000,0.446000,0.010000,0.086000,0.060000,0.038000,0.018000,2,1
27,0.539333,0.296667,0.025333,0.037333,0.024000,0.073333,0.004000,1,2
33,0.301333,0.310667,0.108000,0.118667,0.061333,0.031333,0.068667,2,2
44,0.336667,0.510667,0.037333,0.050667,0.017333,0.036667,0.010667,2,1
50,0.270000,0.478667,0.014000,0.127333,0.051333,0.045333,0.013333,2,5
...,...,...,...,...,...,...,...,...,...
4717,0.538000,0.300000,0.026667,0.040667,0.069333,0.019333,0.006000,1,1
4733,0.538800,0.074800,0.046600,0.191800,0.039000,0.090200,0.018800,1,1
4735,0.344000,0.187333,0.036000,0.188667,0.072000,0.164000,0.008000,1,4
4775,0.435333,0.287333,0.022000,0.121333,0.089333,0.036667,0.008000,1,1


In [47]:
np.mean(df_dnn['prediction'] == df_dnn['label'])

0.6097560975609756

In [71]:
df_dnn.to_csv(f'/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/val_predictions/{model_name}.csv')

## Prepare results for submission

In [48]:
# Train best model on whole training dataset (without val)
# TempCNN model

model_config = {
    'learning_rate': 0.1,
    'n_classes': 7,
    'keep_prob': 0.5,
    'nb_conv_stacks': 2
}
model_final = TempCNNModel(model_config)
model_final.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [49]:
model_final.fit(
    x=features_valid, 
    y=labels_valid,
    batch_size=256,
    epochs=300,
    sample_weight=weights_valid,
)

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 13, 42)]          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 13, 16)            3376      
_________________________________________________________________
activation_3 (Activation)    (None, 13, 16)            0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 13, 16)            0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 13, 16)            1296      
_________________________________________________________________
activation_4 (Activation)    (None, 13, 16)            0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 13, 16)            0   

  ...
    to  
  ['...']


Train on 10363 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/30

Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
E

Epoch 225/300
Epoch 226/300
Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 236/300
Epoch 237/300
Epoch 238/300
Epoch 239/300
Epoch 240/300
Epoch 241/300
Epoch 242/300
Epoch 243/300
Epoch 244/300
Epoch 245/300
Epoch 246/300
Epoch 247/300
Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 

<tensorflow.python.keras.callbacks.History at 0x7fafcc050780>

Test on val

In [55]:
preds_val = model_final.predict(features_val)

In [56]:
one_hot_labels_val = one_hot(labels_val, 7)
log_loss(one_hot_labels_val, preds_val)

0.8696593886087294

Predict on test data

In [60]:
preds_final = model_final.predict(features_test)

In [61]:
# Groups pixel-wise predictions into field predictions by averaging.
    
df_pred = pd.DataFrame(preds_final, columns=[f'Crop_ID_{i+1}' for i in range(7)])
df_pred['Field_ID'] = field_id_test
    
df_grouped = df_pred.groupby('Field_ID').mean()
df_grouped

Unnamed: 0_level_0,Crop_ID_1,Crop_ID_2,Crop_ID_3,Crop_ID_4,Crop_ID_5,Crop_ID_6,Crop_ID_7
Field_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3,0.093878,0.741820,0.018600,0.058965,0.010796,0.007072,0.068869
6,0.092141,0.853336,0.000131,0.013566,0.013991,0.025687,0.001148
11,0.087642,0.674603,0.010626,0.132801,0.056422,0.003938,0.033968
13,0.243931,0.547923,0.016267,0.029965,0.114173,0.034280,0.013460
14,0.048479,0.865151,0.042108,0.007048,0.008549,0.000287,0.028379
...,...,...,...,...,...,...,...
4785,0.909692,0.070041,0.006883,0.007005,0.002077,0.002031,0.002272
4788,0.922520,0.023131,0.007554,0.028200,0.004992,0.013580,0.000022
4790,0.790623,0.102938,0.008065,0.026936,0.042148,0.028407,0.000883
4793,0.537171,0.087778,0.002759,0.163861,0.110695,0.096905,0.000830


In [62]:
df_grouped.to_csv('submissions/temp_cnn_sampling_weighted.csv')

In [63]:
model_final.save_weights('models/temp_cnn_sampling_weighted.hdf5')