In [1]:
import numpy as np
import tensorflow as tf

In [2]:
model_name = 'intercropping'

### Load the data

In [3]:
NP_DATA_PATH = '/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/prepared_data/original_data.npz'

In [4]:
data = np.load(NP_DATA_PATH)
features = data['features']
field_id = data['field_id']
crop_id = data['crop_id']

features.shape, field_id.shape, crop_id.shape

((67557, 13, 14), (67557,), (67557,))

### Normalizing the data

In [5]:
mean_f = features.mean(axis=(0,1))
std_f = features.std(axis=(0,1))

mean_f, std_f

(array([0.04495004, 0.05522608, 0.08520251, 0.08335783, 0.13549377,
        0.25449966, 0.29252816, 0.29083016, 0.31650684, 0.31871104,
        0.26029542, 0.17459822, 0.5274778 , 3.82845028]),
 array([ 0.02151336,  0.023685  ,  0.02388389,  0.03328747,  0.03058358,
         0.0464853 ,  0.05491788,  0.05862865,  0.05730477,  0.05223269,
         0.05640826,  0.05882478,  0.18024394, 17.91834208]))

In [6]:
features = (features - mean_f)/std_f

## Spatial sampling

In [7]:
agg_methods = {
    'min': np.mean,
    'max': np.max,
    'mean': np.mean,
    'std': np.std
}

def sample_field(features, N=10, n_samples=1, methods=['mean']):
    agg_features_stack = []
    indices = np.arange(len(features))
    for _ in range(n_samples):
        sampled_i = np.random.choice(indices, size=N, replace=True)

        sampled_features = features[sampled_i]
        
        aggreagations = [agg_methods[method](sampled_features, axis=0) for method in methods]
        agg_features = np.concatenate(aggreagations, axis=-1)
        
        agg_features_stack.append(agg_features)
    
    return np.stack(agg_features_stack)

In [8]:
from tqdm.auto import tqdm

def sample_dataset(features, labels, field_ids, n=10, methods=['mean']):
    fields, counts = np.unique(field_ids, return_counts=True)
    
    features_sampled = []
    labels_sampled = []
    field_id_sampled = []
    weights_sampled = []
    for fid, c in zip(tqdm(fields), counts):
        n_samples = max(c//n, 3)

        field_mask = field_ids == fid
        field_features = features[field_mask]
        field_label = labels[field_mask][0]
        
        sampled_features = sample_field(field_features, N=n, n_samples=n_samples, methods=methods)
        sampled_labels = np.full(n_samples, field_label)
        sampled_fid = np.full(n_samples, fid)
        sampled_weights = np.full(n_samples, 1.0/n_samples)
        
        features_sampled.append(sampled_features)
        labels_sampled.append(sampled_labels)
        field_id_sampled.append(sampled_fid)
        weights_sampled.append(sampled_weights)
    
    features = np.concatenate(features_sampled, axis=0)
    labels = np.concatenate(labels_sampled, axis=0)
    field_ids = np.concatenate(field_id_sampled, axis=0)
    weights = np.concatenate(weights_sampled, axis=0)
    
    return features, labels, field_ids, weights

In [20]:
features_s, crop_id_s, field_id_s, weights_s = sample_dataset(features, crop_id, field_id, methods=['mean'])

HBox(children=(IntProgress(value=0, max=4688), HTML(value='')))




In [21]:
features_s.shape

(14755, 13, 14)

### Extract training data and split train, val

In [22]:
mask = crop_id_s != 0
features_valid = features_s[mask]
labels_valid = crop_id_s[mask] - 1
field_id_valid = field_id_s[mask]
weights_valid = weights_s[mask]

features_test = features_s[~mask]
field_id_test = field_id_s[~mask]

features_valid.shape, features_test.shape

((10363, 13, 14), (4392, 13, 14))

In [23]:
# # Compute field split and save
# val_ratio = 0.1
# random_state = np.random.RandomState(seed=42)

# fields = np.unique(field_id_valid)
# random_state.shuffle(fields)

# val_i = int(val_ratio * len(fields))
# fields_train = fields[val_i:]
# fields_val = fields[:val_i]
# fields_test = np.unique(field_id_test)

# # Save split
# np.save('/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/split/train.npy', fields_train)
# np.save('/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/split/val.npy', fields_val)
# np.save('/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/split/test.npy', fields_test)
# fields_train.shape, fields_val.shape, fields_test.shape

In [24]:
# Load training field IDs (split)
fields_train = np.load('/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/split/train.npy')

In [25]:
train_mask = np.isin(field_id_valid, fields_train)
print(f'Ratio of pixels in train: {np.mean(train_mask)}')

features_train = features_valid[train_mask]
labels_train = labels_valid[train_mask]
field_id_train = field_id_valid[train_mask]
weights_train = weights_valid[train_mask]

features_val = features_valid[~train_mask]
labels_val = labels_valid[~train_mask]
field_id_val = field_id_valid[~train_mask]
weights_val = weights_valid[~train_mask]

features_train.shape, features_val.shape

Ratio of pixels in train: 0.8999324519926662


((9326, 13, 14), (1037, 13, 14))

## Concat features from intercropping

In [26]:
cropping_dir = '/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/prepared_data/mixed_features'

In [43]:
features4 = np.load(f'{cropping_dir}/features4.npy')
features5 = np.load(f'{cropping_dir}/features5.npy')
features7 = np.load(f'{cropping_dir}/features7.npy')

labels4 = np.load(f'{cropping_dir}/labels4.npy') - 1
labels5 = np.load(f'{cropping_dir}/labels5.npy') - 1
labels7 = np.load(f'{cropping_dir}/labels7.npy') - 1

features4.shape, features5.shape, features7.shape

((1000, 13, 14), (1000, 13, 14), (1000, 13, 14))

In [67]:
features_train_ext = np.concatenate([features_train, features4, features5, features7], axis=0)
labels_train_ext = np.concatenate([labels_train, labels4, labels5, labels7], axis=0)
intrecropping_weights = 1.0
weights_train_ext = np.concatenate([weights_train, 
                                    np.ones_like(labels4) * 0.33, 
                                    np.ones_like(labels5) * 0.15,
                                    np.ones_like(labels7) * 0.075])

In [68]:
# Flattened features
features_train_flat = features_train_ext.reshape((features_train_ext.shape[0], -1))
features_val_flat = features_val.reshape((features_val.shape[0], -1))

### Train model

In [69]:
from eoflow.models import TempCNNModel, BiRNN, TransformerEncoder

In [70]:
# TempCNN model

model_config = {
    'learning_rate': 0.1,
    'n_classes': 7,
    'keep_prob': 0.5,
    'nb_conv_stacks': 2
}
model = TempCNNModel(model_config)

In [71]:
# BiRNN model

# model_config = {
#     'learning_rate': 0.1,
#     'n_classes': 7,
#     'rnn_layer': 'lstm',
#     'keep_prob': 0.5
# }
# model = BiRNN(model_config)

In [72]:
# Dense Model

# model = tf.keras.Sequential([
#     tf.keras.layers.Dense(256, activation='relu'),
#     tf.keras.layers.Dropout(rate=0.5),
#     tf.keras.layers.Dense(7, activation='softmax')
# ])

In [73]:
# Class weights
# classes = np.unique(labels_valid, return_counts=True)
# class_probs = classes[1]/np.sum(classes[1])
# class_inv_probs = 1.0 / class_probs
# weights = {i:inv_prob for i, inv_prob in enumerate(class_inv_probs)}
# weights

In [74]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [75]:
# Temporal
model.fit(
    x=features_train_ext, 
    y=labels_train_ext, 
    validation_data=(features_val, labels_val),
    batch_size=256,
    epochs=250,
    sample_weight=weights_train_ext
)

# Non-temporal
# model.fit(
#     x=features_train_flat, 
#     y=labels_train, 
#     validation_data=(features_val_flat, labels_val),
#     batch_size=1024,
#     epochs=10)

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 13, 14)]          0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 13, 16)            1136      
_________________________________________________________________
activation_6 (Activation)    (None, 13, 16)            0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 13, 16)            0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 13, 16)            1296      
_________________________________________________________________
activation_7 (Activation)    (None, 13, 16)            0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 13, 16)            0   

  ...
    to  
  ['...']


Train on 12326 samples, validate on 1037 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250


Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78/250
Epoch 79/250
Epoch 80/250
Epoch 81/250
Epoch 82/250
Epoch 83/250
Epoch 84/250
Epoch 85/250
Epoch 86/250
Epoch 87/250
Epoch 88/250
Epoch 89/250
Epoch 90/250
Epoch 91/250
Epoch 92/250
Epoch 93/250
Epoch 94/250
Epoch 95/250
Epoch 96/250
Epoch 97/250
Epoch 98/250
Epoch 99/250
Epoch 100/250
Epoch 101/250
Epoch 102/250
Epoch 103/250
Epoch 104/250
Epoch 105/250
Epoch 106/250
Epoch 107/250
Epoch 108/250
Epoch 109/250


Epoch 110/250
Epoch 111/250
Epoch 112/250
Epoch 113/250
Epoch 114/250
Epoch 115/250
Epoch 116/250
Epoch 117/250
Epoch 118/250
Epoch 119/250
Epoch 120/250
Epoch 121/250
Epoch 122/250
Epoch 123/250
Epoch 124/250
Epoch 125/250
Epoch 126/250
Epoch 127/250
Epoch 128/250
Epoch 129/250
Epoch 130/250
Epoch 131/250
Epoch 132/250
Epoch 133/250
Epoch 134/250
Epoch 135/250
Epoch 136/250
Epoch 137/250
Epoch 138/250
Epoch 139/250
Epoch 140/250
Epoch 141/250
Epoch 142/250
Epoch 143/250
Epoch 144/250
Epoch 145/250
Epoch 146/250
Epoch 147/250
Epoch 148/250
Epoch 149/250
Epoch 150/250
Epoch 151/250
Epoch 152/250
Epoch 153/250
Epoch 154/250
Epoch 155/250
Epoch 156/250
Epoch 157/250
Epoch 158/250
Epoch 159/250
Epoch 160/250
Epoch 161/250
Epoch 162/250


Epoch 163/250
Epoch 164/250
Epoch 165/250
Epoch 166/250
Epoch 167/250
Epoch 168/250
Epoch 169/250
Epoch 170/250
Epoch 171/250
Epoch 172/250
Epoch 173/250
Epoch 174/250
Epoch 175/250
Epoch 176/250
Epoch 177/250
Epoch 178/250
Epoch 179/250
Epoch 180/250
Epoch 181/250
Epoch 182/250
Epoch 183/250
Epoch 184/250
Epoch 185/250
Epoch 186/250
Epoch 187/250
Epoch 188/250
Epoch 189/250
Epoch 190/250
Epoch 191/250
Epoch 192/250
Epoch 193/250
Epoch 194/250
Epoch 195/250
Epoch 196/250
Epoch 197/250
Epoch 198/250
Epoch 199/250
Epoch 200/250
Epoch 201/250
Epoch 202/250
Epoch 203/250
Epoch 204/250
Epoch 205/250
Epoch 206/250
Epoch 207/250
Epoch 208/250
Epoch 209/250
Epoch 210/250
Epoch 211/250
Epoch 212/250
Epoch 213/250
Epoch 214/250
Epoch 215/250
Epoch 216/250


Epoch 217/250
Epoch 218/250
Epoch 219/250
Epoch 220/250
Epoch 221/250
Epoch 222/250
Epoch 223/250
Epoch 224/250
Epoch 225/250
Epoch 226/250
Epoch 227/250
Epoch 228/250
Epoch 229/250
Epoch 230/250
Epoch 231/250
Epoch 232/250
Epoch 233/250
Epoch 234/250
Epoch 235/250
Epoch 236/250
Epoch 237/250
Epoch 238/250
Epoch 239/250
Epoch 240/250
Epoch 241/250
Epoch 242/250
Epoch 243/250
Epoch 244/250
Epoch 245/250
Epoch 246/250
Epoch 247/250
Epoch 248/250
Epoch 249/250
Epoch 250/250


<tensorflow.python.keras.callbacks.History at 0x7f515781bb70>

### Random forest

In [57]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss

random_state = 7
rf = RandomForestClassifier(n_estimators=500, random_state=random_state, n_jobs=-1)

rf.fit(features_train_flat, labels_train_ext)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=-1, oob_score=False, random_state=7, verbose=0,
                       warm_start=False)

### Predictions

In [76]:
preds_rf = rf.predict_proba(features_val_flat)
preds_dnn = model.predict(features_val)

In [77]:
preds_rf.shape, preds_dnn.shape

((1037, 7), (1037, 7))

In [78]:
def one_hot(labels, num_classes):
    """ One-hot encodes integer labels for a given number of classes. """
    
    one_hot_labels = np.zeros((labels.size, num_classes))
    one_hot_labels[np.arange(labels.size),labels] = 1
    
    return one_hot_labels

In [79]:
import pandas as pd
def field_predictions(predictions, labels, field_ids):
    """ Groups pixel-wise predictions into field predictions by averaging. """
    
    df = pd.DataFrame(predictions)
    df['CROP_ID'] = labels
    df['FIELD_ID'] = field_ids
    
    df_grouped = df.groupby('FIELD_ID').mean()
    field_preds = df_grouped.drop('CROP_ID', axis=1).values
    field_labels = df_grouped['CROP_ID'].values
    
    return field_preds, field_labels, df_grouped.index

In [80]:
# Create onehot labels for val
one_hot_labels_val = one_hot(labels_val, 7)

### Pixel log loss

In [81]:
rf_loss = log_loss(one_hot_labels_val, preds_rf)
dnn_loss = log_loss(one_hot_labels_val, preds_dnn)

print(f'Random forest: {rf_loss}')
print(f'DNN: {dnn_loss}')

Random forest: 1.230002812363958
DNN: 1.1610276711271248


### Field log loss

In [82]:
preds_rf_field, lbl_rf_field, field_id_rf = field_predictions(preds_rf, labels_val, field_id_val)
preds_dnn_field, lbl_dnn_field, field_id_dnn = field_predictions(preds_dnn, labels_val, field_id_val)

In [83]:
lbl_rf_field_oh = one_hot(lbl_rf_field, 7)
lbl_dnn_field_oh = one_hot(lbl_dnn_field, 7)

In [84]:
rf_loss_field = log_loss(lbl_rf_field_oh, preds_rf_field)
dnn_loss_field = log_loss(lbl_dnn_field_oh, preds_dnn_field)

print(f'Random forest: {rf_loss_field}')
print(f'DNN: {dnn_loss_field}')

Random forest: 1.233688965660122
DNN: 1.1546287469724874


## Prepare predictions for analysis

In [85]:
import pandas as pd

In [86]:
def prepare_predictions(preds, labels, field_ids):
    preds_l = np.argmax(preds, axis=1)

    df = pd.DataFrame(preds, columns=[f'Crop_ID_{i+1}' for i in range(7)])
    df['Field_ID'] = field_ids
    df['prediction'] = preds_l+1
    df['label'] = labels+1
    
    df = df.set_index('Field_ID')
    
    return df

In [87]:
df_rf = prepare_predictions(preds_rf_field, lbl_rf_field, field_id_rf)
df_dnn = prepare_predictions(preds_dnn_field, lbl_dnn_field, field_id_dnn)
df_rf

Unnamed: 0_level_0,Crop_ID_1,Crop_ID_2,Crop_ID_3,Crop_ID_4,Crop_ID_5,Crop_ID_6,Crop_ID_7,prediction,label
Field_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0.472000,0.318000,0.016000,0.078000,0.044000,0.064000,0.008000,1,1
27,0.481333,0.328667,0.029333,0.048000,0.031333,0.077333,0.004000,1,2
33,0.266000,0.308000,0.162000,0.111333,0.066667,0.036000,0.050000,2,2
44,0.301333,0.560667,0.009333,0.074000,0.016667,0.028667,0.009333,2,1
50,0.226000,0.512000,0.013333,0.115333,0.086000,0.034000,0.013333,2,5
...,...,...,...,...,...,...,...,...,...
4717,0.557333,0.266000,0.016000,0.060667,0.068000,0.026667,0.005333,1,1
4733,0.478200,0.081000,0.026600,0.223600,0.052400,0.105400,0.032800,1,1
4735,0.286000,0.187333,0.034000,0.192000,0.080000,0.214000,0.006667,1,4
4775,0.480000,0.216667,0.024667,0.144000,0.070667,0.058000,0.006000,1,1


In [88]:
np.mean(df_dnn['prediction'] == df_dnn['label'])

0.6219512195121951

In [89]:
df_dnn.to_csv(f'/mnt/azrael/spatialdata/projects/eo_data/iclr-2020-challange/mlhub/val_predictions/{model_name}.csv')

## Prepare results for submission

In [53]:
# Train best model on whole training dataset (without val)
# TempCNN model

model_config = {
    'learning_rate': 0.1,
    'n_classes': 7,
    'keep_prob': 0.5,
    'nb_conv_stacks': 2
}
model_final = TempCNNModel(model_config)
model_final.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [54]:
model_final.fit(
    x=features_valid, 
    y=labels_valid,
    batch_size=256,
    epochs=250,
    sample_weight=weights_valid,
)

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 13, 42)]          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 13, 16)            3376      
_________________________________________________________________
activation_3 (Activation)    (None, 13, 16)            0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 13, 16)            0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 13, 16)            1296      
_________________________________________________________________
activation_4 (Activation)    (None, 13, 16)            0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 13, 16)            0   

  ...
    to  
  ['...']


Train on 10363 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/25

Epoch 150/250
Epoch 151/250
Epoch 152/250
Epoch 153/250
Epoch 154/250
Epoch 155/250
Epoch 156/250
Epoch 157/250
Epoch 158/250
Epoch 159/250
Epoch 160/250
Epoch 161/250
Epoch 162/250
Epoch 163/250
Epoch 164/250
Epoch 165/250
Epoch 166/250
Epoch 167/250
Epoch 168/250
Epoch 169/250
Epoch 170/250
Epoch 171/250
Epoch 172/250
Epoch 173/250
Epoch 174/250
Epoch 175/250
Epoch 176/250
Epoch 177/250
Epoch 178/250
Epoch 179/250
Epoch 180/250
Epoch 181/250
Epoch 182/250
Epoch 183/250
Epoch 184/250
Epoch 185/250
Epoch 186/250
Epoch 187/250
Epoch 188/250
Epoch 189/250
Epoch 190/250
Epoch 191/250
Epoch 192/250
Epoch 193/250
Epoch 194/250
Epoch 195/250
Epoch 196/250
Epoch 197/250
Epoch 198/250
Epoch 199/250
Epoch 200/250
Epoch 201/250
Epoch 202/250
Epoch 203/250
Epoch 204/250
Epoch 205/250
Epoch 206/250
Epoch 207/250
Epoch 208/250
Epoch 209/250
Epoch 210/250
Epoch 211/250
Epoch 212/250
Epoch 213/250
Epoch 214/250
Epoch 215/250
Epoch 216/250
Epoch 217/250
Epoch 218/250
Epoch 219/250
Epoch 220/250
Epoch 

<tensorflow.python.keras.callbacks.History at 0x7ff81407f6d8>

Test on val

In [55]:
preds_val = model_final.predict(features_val)

In [56]:
one_hot_labels_val = one_hot(labels_val, 7)
log_loss(one_hot_labels_val, preds_val)

0.8696593886087294

Predict on test data

In [60]:
preds_final = model_final.predict(features_test)

In [61]:
# Groups pixel-wise predictions into field predictions by averaging.
    
df_pred = pd.DataFrame(preds_final, columns=[f'Crop_ID_{i+1}' for i in range(7)])
df_pred['Field_ID'] = field_id_test
    
df_grouped = df_pred.groupby('Field_ID').mean()
df_grouped

Unnamed: 0_level_0,Crop_ID_1,Crop_ID_2,Crop_ID_3,Crop_ID_4,Crop_ID_5,Crop_ID_6,Crop_ID_7
Field_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3,0.093878,0.741820,0.018600,0.058965,0.010796,0.007072,0.068869
6,0.092141,0.853336,0.000131,0.013566,0.013991,0.025687,0.001148
11,0.087642,0.674603,0.010626,0.132801,0.056422,0.003938,0.033968
13,0.243931,0.547923,0.016267,0.029965,0.114173,0.034280,0.013460
14,0.048479,0.865151,0.042108,0.007048,0.008549,0.000287,0.028379
...,...,...,...,...,...,...,...
4785,0.909692,0.070041,0.006883,0.007005,0.002077,0.002031,0.002272
4788,0.922520,0.023131,0.007554,0.028200,0.004992,0.013580,0.000022
4790,0.790623,0.102938,0.008065,0.026936,0.042148,0.028407,0.000883
4793,0.537171,0.087778,0.002759,0.163861,0.110695,0.096905,0.000830


In [62]:
df_grouped.to_csv('submissions/temp_cnn_sampling_weighted.csv')

In [63]:
model_final.save_weights('models/temp_cnn_sampling_weighted.hdf5')