In [None]:
!pip install scikit-learn==1.0
!pip install xgboost==1.4.2
!pip install catboost==0.26.1
!pip install pandas==1.3.3
!pip install radiant-mlhub==0.3.0
!pip install rasterio==1.2.8
!pip install numpy==1.21.2
!pip install pathlib==1.0.1
!pip install tqdm==4.62.3
!pip install joblib==1.0.1
!pip install matplotlib==3.4.3
!pip install Pillow==8.3.2
!pip install torch==1.9.1
!pip install plotly==5.3.1

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Fri Oct  1 09:42:47 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:03:00.0 Off |                  Off |
| N/A   38C    P0    37W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import pandas as pd
import numpy as np
import random
import torch
def seed_all(seed_value):
    random.seed(seed_value) # Python
    np.random.seed(seed_value) # cpu vars
    torch.manual_seed(seed_value) # cpu  vars
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value) # gpu vars
        torch.backends.cudnn.deterministic = True  #needed
        torch.backends.cudnn.benchmark = False

seed_all(13)

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
import warnings
warnings.filterwarnings("ignore")

import gc
import pandas as pd
import numpy as np
from sklearn.metrics import *
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from indices_creation import *

## Data Load Step 
1. We load the mean aggregations for both train and test. The mean aggregations contain the labels and field IDs.
2. The quantile aggregations contain the field IDs. 

In [None]:
import os
os.getcwd()

'/root/Sentinel 2'

In [None]:
train_df_mean = pd.read_csv('train_mean.csv')
#### we need to drop 'label' and 'field_id' later in the code 

test_df_mean  = pd.read_csv('test_mean.csv')
#### we need to drop 'field_id' later in the code 


train_df_median = pd.read_csv('train_median.csv')
#### we need to drop  'field_id' later in the code 

test_df_median  = pd.read_csv('test_median.csv')
#### we need to drop 'field_id' later in the code 

train_size    = pd.read_csv('size_of_field_train.csv')
test_size     = pd.read_csv('size_of_field_test.csv')

train_size    = train_size.rename({'Field_id':'field_id'},axis=1)
test_size     = test_size.rename({'Field_id':'field_id'},axis=1)

train_df_median = train_df_median.merge(train_size, on =['field_id'],how='left')
test_df_median  = test_df_median.merge(test_size, on =['field_id'],how='left')

cluster_df          = pd.read_csv('seven_cluster.csv')
cluster_df          = cluster_df.rename({'cluster_label':'cluster_label_7'},axis=1)

train_df_median = train_df_median.merge(cluster_df,on=['field_id'],how='left')
test_df_median  = test_df_median.merge(cluster_df,on=['field_id'],how='left')

gc.collect()

0

In [None]:
full_nearest=pd.read_csv('full_nearest_radius_0.25.csv')
full_nearest

Unnamed: 0,Crop_1,Crop_2,Crop_3,Crop_4,Crop_5,Crop_6,Crop_7,Crop_8,Crop_9,Crop_10,field_id,count
0,5.360295,14.010592,1.749318,20.077034,1.235757,11.667469,10.865030,2.214733,0.0,32.819772,3020,6231
1,5.301731,13.987213,1.855606,21.253703,1.169499,10.993295,10.572275,2.214252,0.0,32.652425,7478,6413
2,5.385878,13.940887,1.789819,19.819376,1.247947,11.609195,10.821018,2.315271,0.0,33.070608,15902,6090
3,5.478018,13.468248,1.919051,17.725052,1.064201,11.270063,10.554780,2.390091,0.0,36.130495,38846,5732
4,5.285736,14.064655,1.853838,21.403401,1.210357,10.969818,10.663398,2.252183,0.0,32.296614,42856,6527
...,...,...,...,...,...,...,...,...,...,...,...,...
122363,6.342636,14.314328,1.442115,12.858860,0.440646,12.671919,16.116972,2.657231,0.0,33.155294,95767,7489
122364,6.129742,14.554700,1.415613,13.152831,0.453546,12.713029,15.915338,2.680044,0.0,32.985157,101421,7276
122365,5.773801,14.427513,1.467916,13.588704,0.489305,12.596114,16.119111,2.768069,0.0,32.769467,105889,7153
122366,5.756626,14.705044,1.353662,13.607865,0.455970,12.752921,15.830721,2.778569,0.0,32.758621,115157,7018


In [None]:
train_df_median      = train_df_median.merge(full_nearest,on=['field_id'],how='left')
print(train_df_median.shape)
test_df_median       = test_df_median.merge(full_nearest,on=['field_id'],how='left')


(87113, 929)


## Removing Erroneous data points
We observed some data points for which the labels were floats, we will remove them (they are few in number) to make sure our model is learning from correctly labelled data points

In [None]:
print(f'The shape of train data before outlier removal - {train_df_mean.shape}')

train_df_mean = train_df_mean[train_df_mean.label.isin(list(range(1,10)))]

print(f'The shape of train data after outlier removal - {train_df_mean.shape}')

The shape of train data before outlier removal - (87113, 914)
The shape of train data after outlier removal - (87073, 914)


In [None]:
relevant_fids   = train_df_mean['field_id'].values.tolist()
train_df_median = train_df_median[train_df_median['field_id'].isin(relevant_fids)]

print(f'The shape of median train data - {train_df_median.shape} and mean train data {train_df_mean.shape}' )

### two extra columns in train_df_mean being 'label' and 'size_of_field'

The shape of median train data - (87073, 929) and mean train data (87073, 914)


### Extract date list 
We extract the list of all dates where observations were seen for index generation

In [None]:
cols              = ['B01_','B02_','B03_','B04_','B05_','B06_','B07_','B08_','B09_','B8A_','B11_','B12_']

columns_available = train_df_mean.columns.tolist()

cols2consider     = []

for col in cols:
  cols2consider.extend( [c for c in columns_available if col in c])

bands_with_dates = [c for c in columns_available if 'B01_' in c]

dates            = [c.replace('B01_','') for c in bands_with_dates]

print(f'The sample showing the commencement dates where observations were seen is {dates[:10]}')
print(f'The sample showing the ending dates where observations were seen is {dates[-10:]}')

The sample showing the commencement dates where observations were seen is ['month_04_day_01', 'month_04_day_04', 'month_04_day_11', 'month_04_day_14', 'month_04_day_21', 'month_04_day_24', 'month_05_day_01', 'month_05_day_04', 'month_05_day_11', 'month_05_day_14']
The sample showing the ending dates where observations were seen is ['month_11_day_05', 'month_11_day_07', 'month_11_day_10', 'month_11_day_12', 'month_11_day_15', 'month_11_day_17', 'month_11_day_20', 'month_11_day_22', 'month_11_day_27', 'month_11_day_30']


### Removal of field ID column
We consider only the relevant columns to be considered for the next step 

In [None]:
train_df_mean   = train_df_mean[cols2consider+['label']]
test_df_mean    = test_df_mean[cols2consider]

train_df_median = train_df_median[cols2consider+['size_of_field']+['cluster_label_7']+full_nearest.columns.tolist()]
test_df_median  = test_df_median[cols2consider+['size_of_field']+['cluster_label_7']+full_nearest.columns.tolist()]

### Indices Creation
We will create the indices for train and test data for mean aggregates using the indices coded in indices_creation.py module

In [None]:
# train_df_mean = get_band_ndvi_red(train_df_mean,dates)
# train_df_mean = get_band_afri(train_df_mean,dates)
# train_df_mean = get_band_evi2(train_df_mean,dates)
# train_df_mean = get_band_ndmi(train_df_mean,dates)
# train_df_mean = get_band_ndvi(train_df_mean,dates)
# train_df_mean = get_band_evi(train_df_mean,dates)
# train_df_mean = get_band_bndvi(train_df_mean,dates)
# train_df_mean = get_band_nli(train_df_mean,dates)
# train_df_mean = get_band_lci(train_df_mean,dates)


# test_df_mean = get_band_ndvi_red(test_df_mean,dates)
# test_df_mean = get_band_afri(test_df_mean,dates)
# test_df_mean = get_band_evi2(test_df_mean,dates)
# test_df_mean = get_band_ndmi(test_df_mean,dates)
# test_df_mean = get_band_ndvi(test_df_mean,dates)
# test_df_mean = get_band_evi(test_df_mean,dates)
# test_df_mean = get_band_bndvi(test_df_mean,dates)
# test_df_mean = get_band_nli(test_df_mean,dates)
# test_df_mean = get_band_lci(test_df_mean,dates)




We will create the indices for train and test data for median aggregates using the indices coded in indices_creation.py module

In [None]:
train_df_median = get_band_ndvi_red(train_df_median,dates)
train_df_median = get_band_afri(train_df_median,dates)
train_df_median = get_band_evi2(train_df_median,dates)
train_df_median = get_band_ndmi(train_df_median,dates)
train_df_median = get_band_ndvi(train_df_median,dates)
train_df_median = get_band_evi(train_df_median,dates)
train_df_median = get_band_bndvi(train_df_median,dates)
train_df_median = get_band_nli(train_df_median,dates)
# train_df_median = get_band_lci(train_df_median,dates)


test_df_median = get_band_ndvi_red(test_df_median,dates)
test_df_median = get_band_afri(test_df_median,dates)
test_df_median = get_band_evi2(test_df_median,dates)
test_df_median = get_band_ndmi(test_df_median,dates)
test_df_median = get_band_ndvi(test_df_median,dates)
test_df_median = get_band_evi(test_df_median,dates)
test_df_median = get_band_bndvi(test_df_median,dates)
test_df_median = get_band_nli(test_df_median,dates)
# test_df_median = get_band_lci(test_df_median,dates)


# train_df_median = train_df_median.drop(cols2consider,axis=1)
# test_df_median  = test_df_median.drop(cols2consider,axis=1)

In [None]:
train_df_mean.shape,train_df_median.shape,test_df_mean.shape,test_df_median.shape

((87073, 913), (87073, 1534), (35295, 912), (35295, 1534))

In [None]:
######### Saving the label variable and dropping it from the data
train_y        = train_df_mean['label'].values
train_df_mean  = train_df_mean.drop(['label'],axis=1)


train_df_mean.replace([np.inf, -np.inf], np.nan, inplace=True)
test_df_mean.replace([np.inf, -np.inf], np.nan, inplace=True)

train_df_median.replace([np.inf, -np.inf], np.nan, inplace=True)
test_df_median.replace([np.inf, -np.inf], np.nan, inplace=True)

# train_df_slope.replace([np.inf, -np.inf], np.nan, inplace=True)
# test_df_slope.replace([np.inf, -np.inf], np.nan, inplace=True)

train = train_df_median.values
test  = test_df_median.values

# train         = pd.concat([train_df_median,train_df_slope],axis=1).values
# test          = pd.concat([test_df_median,test_df_slope],axis=1).values

print(f'The shape of model ready train data is {train.shape} and model ready test data is {test.shape}')
print(f'The shape of target is {train_y.shape}')

The shape of model ready train data is (87073, 1534) and model ready test data is (35295, 1534)
The shape of target is (87073,)


In [None]:
train1 = pd.read_csv('train_with_slopes.csv')
test1  = pd.read_csv('test_with_slopes.csv')

train1.replace([np.inf, -np.inf], np.nan, inplace=True)
test1.replace([np.inf, -np.inf], np.nan, inplace=True)



In [None]:
train2=pd.concat([pd.DataFrame(train1.values,columns=train1.columns),train_df_median[['size_of_field','cluster_label_7']+full_nearest.columns.tolist()].reset_index(drop=True)],axis=1)
test2=pd.concat([pd.DataFrame(test1.values,columns=test1.columns),test_df_median[['size_of_field','cluster_label_7']+full_nearest.columns.tolist()].reset_index(drop=True)],axis=1)
train2.head()

Unnamed: 0,B01_month_04_day_01,B01_month_04_day_04,B01_month_04_day_11,B01_month_04_day_14,B01_month_04_day_21,B01_month_04_day_24,B01_month_05_day_01,B01_month_05_day_04,B01_month_05_day_11,B01_month_05_day_14,...,Crop_3,Crop_4,Crop_5,Crop_6,Crop_7,Crop_8,Crop_9,Crop_10,field_id,count
0,58.0,,7.0,,24.0,,105.0,,118.0,,...,3.196577,2.284168,4.17191,10.011326,21.583187,2.875661,0.302039,31.481248,2.0,15892.0
1,22.0,,24.0,,14.0,,12.0,,28.0,,...,2.475979,30.059128,1.005174,7.923134,9.933481,1.707317,0.0,26.873614,29.0,13530.0
2,11.0,,10.0,,8.0,,6.0,,49.0,,...,4.116456,54.844307,1.16649,0.771233,0.771233,0.048202,0.0,25.585655,78.0,10373.0
3,10.0,,9.0,,9.0,,11.0,,163.0,,...,2.736031,44.527938,1.926782,0.346821,0.17341,0.0,0.0,22.65896,92.0,5190.0
4,10.0,,7.0,,7.0,,5.0,,138.0,,...,2.529303,42.345995,3.586851,0.652155,0.237948,0.0,0.0,31.841015,104.0,11347.0


In [None]:
del train2['field_id']
del test2['field_id']

In [None]:
pivot=pd.read_csv('pivottable.csv')
pivot

Unnamed: 0,cluster_label_7,Prop_crop1,Prop_crop2,Prop_crop3,Prop_crop4,Prop_crop5,Prop_crop6,Prop_crop7,Prop_crop8,Prop_crop9
0,0,0.075591,0.19993,0.039018,0.548099,0.033879,0.046053,0.049047,0.008382,0.0
1,1,0.037741,0.052977,0.255328,0.074885,0.227046,0.059152,0.0237,0.001892,0.267277
2,2,0.139854,0.183067,0.087514,0.030734,0.074617,0.13215,0.227452,0.04338,0.081233
3,3,0.097868,0.203745,0.084089,0.003769,0.193381,0.167471,0.180191,0.017077,0.052408
4,4,0.081654,0.200249,0.070366,0.459356,0.069266,0.071832,0.041486,0.005791,0.0
5,5,0.065286,0.039571,0.206714,0.447714,0.216,0.024286,0.0,0.0,0.000429
6,6,0.148988,0.153701,0.025072,0.168845,0.011311,0.161807,0.293515,0.035503,0.001257


In [None]:
train2=train2.merge(pivot,how='left',on='cluster_label_7')
test2=test2.merge(pivot,how='left',on='cluster_label_7')

In [None]:
train2

Unnamed: 0,B01_month_04_day_01,B01_month_04_day_04,B01_month_04_day_11,B01_month_04_day_14,B01_month_04_day_21,B01_month_04_day_24,B01_month_05_day_01,B01_month_05_day_04,B01_month_05_day_11,B01_month_05_day_14,...,count,Prop_crop1,Prop_crop2,Prop_crop3,Prop_crop4,Prop_crop5,Prop_crop6,Prop_crop7,Prop_crop8,Prop_crop9
0,58.0,,7.0,,24.0,,105.0,,118.0,,...,15892.0,0.148988,0.153701,0.025072,0.168845,0.011311,0.161807,0.293515,0.035503,0.001257
1,22.0,,24.0,,14.0,,12.0,,28.0,,...,13530.0,0.075591,0.199930,0.039018,0.548099,0.033879,0.046053,0.049047,0.008382,0.000000
2,11.0,,10.0,,8.0,,6.0,,49.0,,...,10373.0,0.081654,0.200249,0.070366,0.459356,0.069266,0.071832,0.041486,0.005791,0.000000
3,10.0,,9.0,,9.0,,11.0,,163.0,,...,5190.0,0.075591,0.199930,0.039018,0.548099,0.033879,0.046053,0.049047,0.008382,0.000000
4,10.0,,7.0,,7.0,,5.0,,138.0,,...,11347.0,0.075591,0.199930,0.039018,0.548099,0.033879,0.046053,0.049047,0.008382,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87068,34.0,0.0,25.0,0.0,22.0,,96.0,,39.0,,...,1478.0,0.037741,0.052977,0.255328,0.074885,0.227046,0.059152,0.023700,0.001892,0.267277
87069,14.0,228.0,12.0,7.0,10.0,6.0,7.0,6.0,187.0,4.0,...,2432.0,0.037741,0.052977,0.255328,0.074885,0.227046,0.059152,0.023700,0.001892,0.267277
87070,16.0,,16.0,,16.0,,8.0,,156.0,,...,15544.0,0.081654,0.200249,0.070366,0.459356,0.069266,0.071832,0.041486,0.005791,0.000000
87071,20.0,0.0,19.0,0.0,16.0,,118.0,,115.0,,...,1391.0,0.037741,0.052977,0.255328,0.074885,0.227046,0.059152,0.023700,0.001892,0.267277


In [None]:
del train_df_mean,train_df_median,train1,train_size,test_df_mean,test_df_median,test1,test_size

In [None]:
import gc
gc.collect()

60

In [None]:
train = train2.values
test  = test2.values

In [None]:
train.shape,test.shape

((87073, 1638), (35295, 1638))

In [None]:
# (1616-1520)/8

In [None]:
oof_pred               = np.zeros((len(train), 9))

y_pred_final           = np.zeros((len(test),9 ))

num_models             = 3
temperature            = 50

n_splits               = 15
error                  = []
kf                     = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=13)

for fold, (tr_ind, val_ind) in enumerate(kf.split(train, train_y)):
    wghts = [0]*num_models
    logloss = []

    X_train, X_val     = train[tr_ind], train[val_ind]
    # X_train1, X_val1   = train_max[tr_ind], train_max[val_ind]

    y_train, y_val     = train_y[tr_ind], train_y[val_ind]



    model1 = XGBClassifier(n_estimators=2000,random_state=13,learning_rate=0.04,colsample_bytree=0.95,reg_lambda=11,
                           
                           tree_method='gpu_hist',eval_metric='mlogloss')

    model2 = CatBoostClassifier(task_type='GPU',verbose=False,n_estimators=5000,random_state=13,auto_class_weights='SqrtBalanced',max_depth=9,learning_rate=0.06)
    
    model3 = CatBoostClassifier(task_type='GPU',verbose=False,n_estimators=5000,random_state=13,auto_class_weights='SqrtBalanced',max_depth=10,learning_rate=0.04)

#     model4 = CatBoostClassifier(task_type='GPU',verbose=False,n_estimators=5000,random_state=13,auto_class_weights='SqrtBalanced',max_depth=11)


    model1.fit(X_train,y_train)
    val_pred1 = model1.predict_proba(X_val)    
    logloss.append(log_loss(y_val,val_pred1))   
    print('validation logloss model1 fold-',fold+1,': ',log_loss(y_val,val_pred1))
   
    
    model2.fit(X_train,y_train)
    val_pred2 = model2.predict_proba(X_val) 
    logloss.append(log_loss(y_val,val_pred2))   
    print('validation logloss model2 fold-',fold+1,': ',log_loss(y_val,val_pred2))


    model3.fit(X_train,y_train)
    val_pred3 = model3.predict_proba(X_val) 
    logloss.append(log_loss(y_val,val_pred3))   
    print('validation logloss model3 fold-',fold+1,': ',log_loss(y_val,val_pred3))

#     model4.fit(X_train,y_train)
#     val_pred4 = model4.predict_proba(X_val) 
#     logloss.append(log_loss(y_val,val_pred4))   
#     print('validation logloss model4 fold-',fold+1,': ',log_loss(y_val,val_pred4))
    
    


    wghts = np.exp(-temperature*np.array(logloss/sum(logloss)))
    wghts = wghts/sum(wghts)
    print(wghts)

    val_pred           = wghts[0]*val_pred1+wghts[1]*val_pred2+wghts[2]*val_pred3 #+wghts[3]*val_pred4
    
    print('Validation logloss for fold- ',fold+1,': ',log_loss(y_val,val_pred))

    oof_pred[val_ind]  = val_pred
    
    y_pred_final += (wghts[0]*model1.predict_proba(test)+
                          wghts[1]*model2.predict_proba(test)+wghts[2]*model3.predict_proba(test)
                         )/(n_splits)

    
    

print('OOF LogLoss :- ',(log_loss(train_y,oof_pred)))

validation logloss model1 fold- 1 :  0.5936555034390998


Custom logger is already specified. Specify more than one logger at same time is not thread safe.

validation logloss model2 fold- 1 :  0.5707725128714417
validation logloss model3 fold- 1 :  0.5657906911864271
[0.19325568 0.37438645 0.43235787]
Validation logloss for fold-  1 :  0.5609664661027098
validation logloss model1 fold- 2 :  0.5898446269806285
validation logloss model2 fold- 2 :  0.5659532736162941
validation logloss model3 fold- 2 :  0.5617881160273489
[0.18983216 0.38055568 0.42961216]
Validation logloss for fold-  2 :  0.5570822841421211
validation logloss model1 fold- 3 :  0.5824575493149275
validation logloss model2 fold- 3 :  0.5597204293929319
validation logloss model3 fold- 3 :  0.5526988718086507
[0.18651261 0.3647671  0.44872029]
Validation logloss for fold-  3 :  0.5491273549753668
validation logloss model1 fold- 4 :  0.5829159917986482
validation logloss model2 fold- 4 :  0.5622862055362879
validation logloss model3 fold- 4 :  0.5614108536845873
[0.21240995 0.38874569 0.39884436]
Validation logloss for fold-  4 :  0.5537175440899942
validation logloss model1 fo

In [None]:
outputs               = y_pred_final.copy()

test_df               = pd.read_csv('test_mean.csv')
field_ids_test        = test_df['field_id'].values.tolist()

data_test             = pd.DataFrame(outputs)
data_test['field_id'] = field_ids_test
data_test             = data_test[data_test.field_id != 0]
data_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,field_id
0,0.037681,0.247147,0.010033,0.000017,0.231794,0.419999,0.046625,7.477364e-04,5.955092e-03,30.0
1,0.780669,0.070147,0.002477,0.000670,0.002030,0.098018,0.040909,5.056080e-03,2.487443e-05,56.0
2,0.001181,0.004220,0.000150,0.000648,0.000278,0.980621,0.012852,4.846798e-05,1.194055e-06,60.0
3,0.135308,0.114244,0.019421,0.000194,0.022771,0.338676,0.120763,2.484114e-01,2.128342e-04,97.0
4,0.465777,0.101670,0.026968,0.000189,0.148009,0.226407,0.021749,6.631608e-03,2.598801e-03,103.0
...,...,...,...,...,...,...,...,...,...,...
35290,0.011121,0.046027,0.209643,0.000441,0.019878,0.396291,0.314521,3.151616e-04,1.762608e-03,122646.0
35291,0.004670,0.042771,0.043667,0.000064,0.725697,0.015327,0.001126,7.794882e-05,1.666003e-01,122679.0
35292,0.007762,0.004155,0.000698,0.000039,0.000117,0.984989,0.002199,3.016745e-05,1.064694e-05,122692.0
35293,0.000289,0.001159,0.001328,0.996582,0.000625,0.000008,0.000009,1.321895e-07,4.001303e-07,122696.0


In [None]:
data_test = data_test.rename(columns={
    0:'Lucerne/Medics',
    1:'Planted pastures (perennial)', 
    2:'Fallow',
    3:'Wine grapes',
    4:'Weeds',
    5:'Small grain grazing',
    6:'Wheat',
    7:'Canola',
    8:'Rooibos'
})

pred_df = data_test[['field_id', 'Lucerne/Medics', 'Planted pastures (perennial)', 'Fallow', 'Wine grapes', 'Weeds', 'Small grain grazing', 'Wheat', 'Canola', 'Rooibos']]
pred_df['field_id'] = pred_df['field_id'].astype(int)
pred_df = pred_df.sort_values(by=['field_id'],ascending=True)
pred_df

Unnamed: 0,field_id,Lucerne/Medics,Planted pastures (perennial),Fallow,Wine grapes,Weeds,Small grain grazing,Wheat,Canola,Rooibos
31256,5,0.000088,0.000632,0.000043,0.000381,0.000005,0.010702,0.986637,0.001512,1.183450e-07
19069,10,0.180454,0.665344,0.010827,0.004253,0.007558,0.125099,0.006052,0.000384,2.798391e-05
25220,11,0.092343,0.154214,0.007324,0.000084,0.031645,0.697745,0.009930,0.006109,6.066633e-04
25221,17,0.002715,0.012518,0.352008,0.001644,0.370400,0.223994,0.003746,0.000023,3.295181e-02
25222,18,0.000711,0.038963,0.105315,0.000913,0.852112,0.001945,0.000031,0.000008,1.226926e-06
...,...,...,...,...,...,...,...,...,...,...
6202,122722,0.000177,0.035403,0.886663,0.000109,0.072472,0.004989,0.000058,0.000010,1.202216e-04
31255,122724,0.053508,0.720629,0.075951,0.030575,0.018067,0.064423,0.035687,0.000673,4.872335e-04
6203,122726,0.001349,0.000254,0.000543,0.000007,0.000035,0.005697,0.992012,0.000099,3.152930e-06
35294,122730,0.010287,0.972242,0.000156,0.001071,0.002802,0.008092,0.004933,0.000407,1.045196e-05


In [None]:
pred_df.to_csv('trial1_sep_salim.csv',index=False)