In [None]:
!pip install scikit-learn==1.0
!pip install xgboost==1.4.2
!pip install catboost==0.26.1
!pip install pandas==1.3.3
!pip install radiant-mlhub==0.3.0
!pip install rasterio==1.2.8
!pip install numpy==1.21.2
!pip install pathlib==1.0.1
!pip install tqdm==4.62.3
!pip install joblib==1.0.1
!pip install matplotlib==3.4.3
!pip install Pillow==8.3.2
!pip install torch==1.9.1
!pip install plotly==5.3.1


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Fri Oct  1 21:18:51 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:03:00.0 Off |                  Off |
| N/A   38C    P0    38W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import pandas as pd
import numpy as np
import random
import torch
def seed_all(seed_value):
    random.seed(seed_value) # Python
    np.random.seed(seed_value) # cpu vars
    torch.manual_seed(seed_value) # cpu  vars
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value) # gpu vars
        torch.backends.cudnn.deterministic = True  #needed
        torch.backends.cudnn.benchmark = False

seed_all(13)

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
import warnings
warnings.filterwarnings("ignore")

import gc
import pandas as pd
import numpy as np
from sklearn.metrics import *
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from indices_creation import *

## Data Load Step 
1. We load the mean aggregations for both train and test. The mean aggregations contain the labels and field IDs.
2. The quantile aggregations contain the field IDs. 

In [None]:
import os
os.getcwd()

'/root'

In [None]:
train_df_mean = pd.read_csv('train_mean.csv')
#### we need to drop 'label' and 'field_id' later in the code 

test_df_mean  = pd.read_csv('test_mean.csv')
#### we need to drop 'field_id' later in the code 


train_df_median = pd.read_csv('train_median.csv')
#### we need to drop  'field_id' later in the code 

test_df_median  = pd.read_csv('test_median.csv')
#### we need to drop 'field_id' later in the code 

train_size    = pd.read_csv('size_of_field_train.csv')
test_size     = pd.read_csv('size_of_field_test.csv')

train_size    = train_size.rename({'Field_id':'field_id'},axis=1)
test_size     = test_size.rename({'Field_id':'field_id'},axis=1)

train_df_median = train_df_median.merge(train_size, on =['field_id'],how='left')
test_df_median  = test_df_median.merge(test_size, on =['field_id'],how='left')

cluster_df          = pd.read_csv('seven_cluster.csv')
cluster_df          = cluster_df.rename({'cluster_label':'cluster_label_7'},axis=1)

train_df_median = train_df_median.merge(cluster_df,on=['field_id'],how='left')
test_df_median  = test_df_median.merge(cluster_df,on=['field_id'],how='left')

gc.collect()

0

In [None]:
full_nearest1=pd.read_csv('full_nearest_radius_0.25.csv')
full_nearest2=pd.read_csv('full_nearest_radius_0.4.csv')

In [None]:
colsnearest40 = full_nearest2.columns.tolist()
_             = colsnearest40.remove('field_id')
colsnearest40

['Crop_1_0.4',
 'Crop_2_0.4',
 'Crop_3_0.4',
 'Crop_4_0.4',
 'Crop_5_0.4',
 'Crop_6_0.4',
 'Crop_7_0.4',
 'Crop_8_0.4',
 'Crop_9_0.4',
 'Crop_10_0.4',
 'count_0.4']

In [None]:
train_df_median      = train_df_median.merge(full_nearest1,on=['field_id'],how='left')
train_df_median      = train_df_median.merge(full_nearest2,on=['field_id'],how='left')
print(train_df_median.shape)
test_df_median       = test_df_median.merge(full_nearest1,on=['field_id'],how='left')
test_df_median       = test_df_median.merge(full_nearest2,on=['field_id'],how='left')


(87113, 940)


## Removing Erroneous data points
We observed some data points for which the labels were floats, we will remove them (they are few in number) to make sure our model is learning from correctly labelled data points

In [None]:
print(f'The shape of train data before outlier removal - {train_df_mean.shape}')

train_df_mean = train_df_mean[train_df_mean.label.isin(list(range(1,10)))]

print(f'The shape of train data after outlier removal - {train_df_mean.shape}')

The shape of train data before outlier removal - (87113, 914)
The shape of train data after outlier removal - (87073, 914)


In [None]:
relevant_fids   = train_df_mean['field_id'].values.tolist()
train_df_median = train_df_median[train_df_median['field_id'].isin(relevant_fids)]

print(f'The shape of median train data - {train_df_median.shape} and mean train data {train_df_mean.shape}' )

### two extra columns in train_df_mean being 'label' and 'size_of_field'

The shape of median train data - (87073, 940) and mean train data (87073, 914)


### Extract date list 
We extract the list of all dates where observations were seen for index generation

In [None]:
cols              = ['B01_','B02_','B03_','B04_','B05_','B06_','B07_','B08_','B09_','B8A_','B11_','B12_']

columns_available = train_df_mean.columns.tolist()

cols2consider     = []

for col in cols:
  cols2consider.extend( [c for c in columns_available if col in c])

bands_with_dates = [c for c in columns_available if 'B01_' in c]

dates            = [c.replace('B01_','') for c in bands_with_dates]

print(f'The sample showing the commencement dates where observations were seen is {dates[:10]}')
print(f'The sample showing the ending dates where observations were seen is {dates[-10:]}')

The sample showing the commencement dates where observations were seen is ['month_04_day_01', 'month_04_day_04', 'month_04_day_11', 'month_04_day_14', 'month_04_day_21', 'month_04_day_24', 'month_05_day_01', 'month_05_day_04', 'month_05_day_11', 'month_05_day_14']
The sample showing the ending dates where observations were seen is ['month_11_day_05', 'month_11_day_07', 'month_11_day_10', 'month_11_day_12', 'month_11_day_15', 'month_11_day_17', 'month_11_day_20', 'month_11_day_22', 'month_11_day_27', 'month_11_day_30']


### Removal of field ID column
We consider only the relevant columns to be considered for the next step 

In [None]:
train_df_mean   = train_df_mean[cols2consider+['label']]
test_df_mean    = test_df_mean[cols2consider]

train_df_median = train_df_median[cols2consider+['size_of_field']+['cluster_label_7']+full_nearest1.columns.tolist()+colsnearest40]
test_df_median  = test_df_median[cols2consider+['size_of_field']+['cluster_label_7']+full_nearest1.columns.tolist()+colsnearest40]

### Indices Creation
We will create the indices for train and test data for mean aggregates using the indices coded in indices_creation.py module

In [None]:
# train_df_mean = get_band_ndvi_red(train_df_mean,dates)
# train_df_mean = get_band_afri(train_df_mean,dates)
# train_df_mean = get_band_evi2(train_df_mean,dates)
# train_df_mean = get_band_ndmi(train_df_mean,dates)
# train_df_mean = get_band_ndvi(train_df_mean,dates)
# train_df_mean = get_band_evi(train_df_mean,dates)
# train_df_mean = get_band_bndvi(train_df_mean,dates)
# train_df_mean = get_band_nli(train_df_mean,dates)
# train_df_mean = get_band_lci(train_df_mean,dates)


# test_df_mean = get_band_ndvi_red(test_df_mean,dates)
# test_df_mean = get_band_afri(test_df_mean,dates)
# test_df_mean = get_band_evi2(test_df_mean,dates)
# test_df_mean = get_band_ndmi(test_df_mean,dates)
# test_df_mean = get_band_ndvi(test_df_mean,dates)
# test_df_mean = get_band_evi(test_df_mean,dates)
# test_df_mean = get_band_bndvi(test_df_mean,dates)
# test_df_mean = get_band_nli(test_df_mean,dates)
# test_df_mean = get_band_lci(test_df_mean,dates)




We will create the indices for train and test data for median aggregates using the indices coded in indices_creation.py module

In [None]:
train_df_median = get_band_ndvi_red(train_df_median,dates)
train_df_median = get_band_afri(train_df_median,dates)
train_df_median = get_band_evi2(train_df_median,dates)
train_df_median = get_band_ndmi(train_df_median,dates)
train_df_median = get_band_ndvi(train_df_median,dates)
train_df_median = get_band_evi(train_df_median,dates)
train_df_median = get_band_bndvi(train_df_median,dates)
train_df_median = get_band_nli(train_df_median,dates)
# train_df_median = get_band_lci(train_df_median,dates)


test_df_median = get_band_ndvi_red(test_df_median,dates)
test_df_median = get_band_afri(test_df_median,dates)
test_df_median = get_band_evi2(test_df_median,dates)
test_df_median = get_band_ndmi(test_df_median,dates)
test_df_median = get_band_ndvi(test_df_median,dates)
test_df_median = get_band_evi(test_df_median,dates)
test_df_median = get_band_bndvi(test_df_median,dates)
test_df_median = get_band_nli(test_df_median,dates)
# test_df_median = get_band_lci(test_df_median,dates)


# train_df_median = train_df_median.drop(cols2consider,axis=1)
# test_df_median  = test_df_median.drop(cols2consider,axis=1)

In [None]:
train_df_mean.shape,train_df_median.shape,test_df_mean.shape,test_df_median.shape

((87073, 913), (87073, 1545), (35295, 912), (35295, 1545))

In [None]:
######### Saving the label variable and dropping it from the data
train_y        = train_df_mean['label'].values
train_df_mean  = train_df_mean.drop(['label'],axis=1)


train_df_mean.replace([np.inf, -np.inf], np.nan, inplace=True)
test_df_mean.replace([np.inf, -np.inf], np.nan, inplace=True)

train_df_median.replace([np.inf, -np.inf], np.nan, inplace=True)
test_df_median.replace([np.inf, -np.inf], np.nan, inplace=True)

# train_df_slope.replace([np.inf, -np.inf], np.nan, inplace=True)
# test_df_slope.replace([np.inf, -np.inf], np.nan, inplace=True)

train = train_df_median.values
test  = test_df_median.values

# train         = pd.concat([train_df_median,train_df_slope],axis=1).values
# test          = pd.concat([test_df_median,test_df_slope],axis=1).values

print(f'The shape of model ready train data is {train.shape} and model ready test data is {test.shape}')
print(f'The shape of target is {train_y.shape}')

The shape of model ready train data is (87073, 1545) and model ready test data is (35295, 1545)
The shape of target is (87073,)


In [None]:
train1 = pd.read_csv('train_with_slopes.csv')
test1  = pd.read_csv('test_with_slopes.csv')

train1.replace([np.inf, -np.inf], np.nan, inplace=True)
test1.replace([np.inf, -np.inf], np.nan, inplace=True)



In [None]:
train2=pd.concat([pd.DataFrame(train1.values,columns=train1.columns),train_df_median[['size_of_field','cluster_label_7']+full_nearest1.columns.tolist()+colsnearest40].reset_index(drop=True)],axis=1)
test2=pd.concat([pd.DataFrame(test1.values,columns=test1.columns),test_df_median[['size_of_field','cluster_label_7']+full_nearest1.columns.tolist()+colsnearest40].reset_index(drop=True)],axis=1)
train2.head()

Unnamed: 0,B01_month_04_day_01,B01_month_04_day_04,B01_month_04_day_11,B01_month_04_day_14,B01_month_04_day_21,B01_month_04_day_24,B01_month_05_day_01,B01_month_05_day_04,B01_month_05_day_11,B01_month_05_day_14,...,Crop_2_0.4,Crop_3_0.4,Crop_4_0.4,Crop_5_0.4,Crop_6_0.4,Crop_7_0.4,Crop_8_0.4,Crop_9_0.4,Crop_10_0.4,count_0.4
0,58.26087,,8.826087,,23.73913,,98.565217,,120.173913,,...,14.955609,3.809641,5.581186,4.135654,11.10291,16.993705,2.370261,1.174879,29.466281,48771.0
1,22.491803,,24.770492,,13.885246,,12.344262,,26.04918,,...,13.66261,2.841607,30.91258,1.866909,6.309446,8.243691,1.131256,0.001683,28.591149,59403.0
2,10.785714,,10.142857,,8.885714,,6.385714,,47.542857,,...,14.023369,3.256135,30.126057,1.867828,6.19608,8.061814,1.109808,0.0,28.767066,47756.0
3,9.940594,,9.039604,,8.811881,,10.772277,,161.980198,,...,12.145954,2.987043,43.507366,2.048837,2.761366,2.888151,0.40571,0.0,28.676116,39437.0
4,9.922936,,7.808257,,7.249541,,5.009174,,136.221101,,...,13.53841,2.790385,41.253992,2.454194,3.092957,3.04589,0.474029,0.0,28.122374,29745.0


In [None]:
del train2['field_id']
del test2['field_id']

In [None]:
pivot=pd.read_csv('pivottable.csv')
pivot

Unnamed: 0,cluster_label_7,Prop_crop1,Prop_crop2,Prop_crop3,Prop_crop4,Prop_crop5,Prop_crop6,Prop_crop7,Prop_crop8,Prop_crop9
0,0,0.075591,0.19993,0.039018,0.548099,0.033879,0.046053,0.049047,0.008382,0.0
1,1,0.037741,0.052977,0.255328,0.074885,0.227046,0.059152,0.0237,0.001892,0.267277
2,2,0.139854,0.183067,0.087514,0.030734,0.074617,0.13215,0.227452,0.04338,0.081233
3,3,0.097868,0.203745,0.084089,0.003769,0.193381,0.167471,0.180191,0.017077,0.052408
4,4,0.081654,0.200249,0.070366,0.459356,0.069266,0.071832,0.041486,0.005791,0.0
5,5,0.065286,0.039571,0.206714,0.447714,0.216,0.024286,0.0,0.0,0.000429
6,6,0.148988,0.153701,0.025072,0.168845,0.011311,0.161807,0.293515,0.035503,0.001257


In [None]:
train2=train2.merge(pivot,how='left',on='cluster_label_7')
test2=test2.merge(pivot,how='left',on='cluster_label_7')

In [None]:
train2

Unnamed: 0,B01_month_04_day_01,B01_month_04_day_04,B01_month_04_day_11,B01_month_04_day_14,B01_month_04_day_21,B01_month_04_day_24,B01_month_05_day_01,B01_month_05_day_04,B01_month_05_day_11,B01_month_05_day_14,...,count_0.4,Prop_crop1,Prop_crop2,Prop_crop3,Prop_crop4,Prop_crop5,Prop_crop6,Prop_crop7,Prop_crop8,Prop_crop9
0,58.260870,,8.826087,,23.739130,,98.565217,,120.173913,,...,48771.0,0.148988,0.153701,0.025072,0.168845,0.011311,0.161807,0.293515,0.035503,0.001257
1,22.491803,,24.770492,,13.885246,,12.344262,,26.049180,,...,59403.0,0.075591,0.199930,0.039018,0.548099,0.033879,0.046053,0.049047,0.008382,0.000000
2,10.785714,,10.142857,,8.885714,,6.385714,,47.542857,,...,47756.0,0.081654,0.200249,0.070366,0.459356,0.069266,0.071832,0.041486,0.005791,0.000000
3,9.940594,,9.039604,,8.811881,,10.772277,,161.980198,,...,39437.0,0.075591,0.199930,0.039018,0.548099,0.033879,0.046053,0.049047,0.008382,0.000000
4,9.922936,,7.808257,,7.249541,,5.009174,,136.221101,,...,29745.0,0.075591,0.199930,0.039018,0.548099,0.033879,0.046053,0.049047,0.008382,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87068,33.291785,0.000000,23.810198,0.000000,22.005666,,95.643059,,40.201133,,...,7924.0,0.037741,0.052977,0.255328,0.074885,0.227046,0.059152,0.023700,0.001892,0.267277
87069,13.775216,224.293948,11.123919,6.694524,9.873199,6.420749,7.112392,5.334294,186.092219,3.893372,...,13459.0,0.037741,0.052977,0.255328,0.074885,0.227046,0.059152,0.023700,0.001892,0.267277
87070,15.895765,,16.146580,,16.876221,,8.044517,,138.827362,,...,47616.0,0.081654,0.200249,0.070366,0.459356,0.069266,0.071832,0.041486,0.005791,0.000000
87071,20.438291,0.000000,19.069620,0.000000,16.447785,,118.457278,,109.474684,,...,7492.0,0.037741,0.052977,0.255328,0.074885,0.227046,0.059152,0.023700,0.001892,0.267277


In [None]:
del train_df_mean,train_df_median,train1,train_size,test_df_mean,test_df_median,test1,test_size

In [None]:
import gc
gc.collect()

40

In [None]:
train = train2[list(set(train2.columns.tolist()))].values
test  = test2[list(set(train2.columns.tolist()))].values

In [None]:
train.shape,test.shape

((87073, 1650), (35295, 1650))

In [None]:
# (1616-1520)/8

In [None]:
# train_slope = train1[[cols for cols in train1.columns if re.findall(r'\b(\w+NDVI_red_slope)\b',cols)]]
# test_slope  = test1[[cols for cols in test1.columns if re.findall(r'\b(\w+NDVI_red_slope)\b',cols)]]

In [None]:
# train         = pd.concat([train_df_median.reset_index(drop=True),train_slope],axis=1).values
# test          = pd.concat([test_df_median.reset_index(drop=True),test_slope],axis=1).values

In [None]:
# train.shape,test.shape


In [None]:
oof_pred               = np.zeros((len(train), 9))

y_pred_final           = np.zeros((len(test),9 ))

num_models             = 3
temperature            = 50

n_splits               = 15
error                  = []
kf                     = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=13)

for fold, (tr_ind, val_ind) in enumerate(kf.split(train, train_y)):
    wghts = [0]*num_models
    logloss = []

    X_train, X_val     = train[tr_ind], train[val_ind]
    # X_train1, X_val1   = train_max[tr_ind], train_max[val_ind]

    y_train, y_val     = train_y[tr_ind], train_y[val_ind]



    model1 = XGBClassifier(n_estimators=2000,random_state=13,learning_rate=0.04,colsample_bytree=0.95,reg_lambda=13,
                           
                           tree_method='gpu_hist',eval_metric='mlogloss')

    model2 = CatBoostClassifier(task_type='GPU',verbose=False,n_estimators=5000,random_state=13,auto_class_weights='SqrtBalanced',max_depth=9,learning_rate=0.05)
    
    model3 = CatBoostClassifier(task_type='GPU',verbose=False,n_estimators=5000,random_state=13,auto_class_weights='SqrtBalanced',max_depth=10,learning_rate=0.04)

#     model4 = CatBoostClassifier(task_type='GPU',verbose=False,n_estimators=5000,random_state=13,auto_class_weights='SqrtBalanced',max_depth=11)


    model1.fit(X_train,y_train)
    val_pred1 = model1.predict_proba(X_val)    
    logloss.append(log_loss(y_val,val_pred1))   
    print('validation logloss model1 fold-',fold+1,': ',log_loss(y_val,val_pred1))
   
    
    model2.fit(X_train,y_train)
    val_pred2 = model2.predict_proba(X_val) 
    logloss.append(log_loss(y_val,val_pred2))   
    print('validation logloss model2 fold-',fold+1,': ',log_loss(y_val,val_pred2))


    model3.fit(X_train,y_train)
    val_pred3 = model3.predict_proba(X_val) 
    logloss.append(log_loss(y_val,val_pred3))   
    print('validation logloss model3 fold-',fold+1,': ',log_loss(y_val,val_pred3))

#     model4.fit(X_train,y_train)
#     val_pred4 = model4.predict_proba(X_val) 
#     logloss.append(log_loss(y_val,val_pred4))   
#     print('validation logloss model4 fold-',fold+1,': ',log_loss(y_val,val_pred4))
    
    


    wghts = np.exp(-temperature*np.array(logloss/sum(logloss)))
    wghts = wghts/sum(wghts)
    print(wghts)

    val_pred           = wghts[0]*val_pred1+wghts[1]*val_pred2+wghts[2]*val_pred3 #+wghts[3]*val_pred4
    
    print('Validation logloss for fold- ',fold+1,': ',log_loss(y_val,val_pred))

    oof_pred[val_ind]  = val_pred
    
    y_pred_final += (wghts[0]*model1.predict_proba(test)+
                          wghts[1]*model2.predict_proba(test)+wghts[2]*model3.predict_proba(test)
                         )/(n_splits)

    
    

print('OOF LogLoss :- ',(log_loss(train_y,oof_pred)))

validation logloss model1 fold- 1 :  0.5877924835419754
validation logloss model2 fold- 1 :  0.5686535436064419
validation logloss model3 fold- 1 :  0.5619457658249934
[0.20548243 0.35861369 0.43590388]
Validation logloss for fold-  1 :  0.5579740768992283
validation logloss model1 fold- 2 :  0.5871160772721382
validation logloss model2 fold- 2 :  0.5559436047752084
validation logloss model3 fold- 2 :  0.5531933200259915
[0.16065456 0.40267072 0.43667472]
Validation logloss for fold-  2 :  0.5495859745117204
validation logloss model1 fold- 3 :  0.577415838338748
validation logloss model2 fold- 3 :  0.5543731450218882
validation logloss model3 fold- 3 :  0.5524433427821064
[0.19682247 0.39008835 0.41308918]
Validation logloss for fold-  3 :  0.5468733433342736
validation logloss model1 fold- 4 :  0.5705960778655467
validation logloss model2 fold- 4 :  0.5467584949515027
validation logloss model3 fold- 4 :  0.5467080124031812
[0.19621001 0.40159019 0.4021998 ]
Validation logloss for fold

In [None]:
outputs               = y_pred_final.copy()

test_df               = pd.read_csv('test_mean.csv')
field_ids_test        = test_df['field_id'].values.tolist()

data_test             = pd.DataFrame(outputs)
data_test['field_id'] = field_ids_test
data_test             = data_test[data_test.field_id != 0]
data_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,field_id
0,0.027289,0.298823,0.008114,0.000028,0.213226,0.393688,0.053636,8.667770e-04,4.328525e-03,30.0
1,0.796483,0.071137,0.001855,0.000778,0.002329,0.088037,0.034633,4.701676e-03,4.489268e-05,56.0
2,0.001111,0.005675,0.000152,0.000511,0.000210,0.983060,0.009236,4.283732e-05,1.087809e-06,60.0
3,0.139158,0.096670,0.023242,0.000184,0.017144,0.421435,0.118316,1.836986e-01,1.528477e-04,97.0
4,0.408185,0.116360,0.020229,0.000213,0.224577,0.221452,0.005435,2.265238e-03,1.283425e-03,103.0
...,...,...,...,...,...,...,...,...,...,...
35290,0.007985,0.038707,0.156166,0.000355,0.018493,0.417150,0.358624,3.393917e-04,2.178943e-03,122646.0
35291,0.003449,0.033928,0.035072,0.000051,0.802478,0.012815,0.000983,1.159262e-04,1.111079e-01,122679.0
35292,0.009033,0.005333,0.000739,0.000056,0.000154,0.981157,0.003446,6.501536e-05,1.669552e-05,122692.0
35293,0.000323,0.001105,0.001725,0.995949,0.000879,0.000010,0.000009,1.771444e-07,4.028180e-07,122696.0


In [None]:
data_test = data_test.rename(columns={
    0:'Lucerne/Medics',
    1:'Planted pastures (perennial)', 
    2:'Fallow',
    3:'Wine grapes',
    4:'Weeds',
    5:'Small grain grazing',
    6:'Wheat',
    7:'Canola',
    8:'Rooibos'
})

pred_df = data_test[['field_id', 'Lucerne/Medics', 'Planted pastures (perennial)', 'Fallow', 'Wine grapes', 'Weeds', 'Small grain grazing', 'Wheat', 'Canola', 'Rooibos']]
pred_df['field_id'] = pred_df['field_id'].astype(int)
pred_df = pred_df.sort_values(by=['field_id'],ascending=True)
pred_df

Unnamed: 0,field_id,Lucerne/Medics,Planted pastures (perennial),Fallow,Wine grapes,Weeds,Small grain grazing,Wheat,Canola,Rooibos
31256,5,0.000109,0.000952,0.000058,0.000467,0.000009,0.010922,0.984966,0.002517,2.788245e-07
19069,10,0.178528,0.667662,0.007207,0.002339,0.006618,0.132891,0.004466,0.000264,2.416339e-05
25220,11,0.110943,0.216427,0.005735,0.000093,0.034633,0.608299,0.012253,0.010895,7.235058e-04
25221,17,0.003038,0.015980,0.367803,0.001673,0.377777,0.182504,0.003099,0.000036,4.809052e-02
25222,18,0.001152,0.021704,0.083676,0.002851,0.889102,0.001476,0.000030,0.000007,1.421812e-06
...,...,...,...,...,...,...,...,...,...,...
6202,122722,0.000200,0.036617,0.873015,0.000145,0.084125,0.005686,0.000067,0.000012,1.315617e-04
31255,122724,0.035144,0.671649,0.088709,0.032365,0.009985,0.089345,0.071948,0.000619,2.359834e-04
6203,122726,0.002086,0.000360,0.001191,0.000007,0.000047,0.005558,0.990643,0.000106,3.348195e-06
35294,122730,0.008914,0.972591,0.000196,0.001610,0.003631,0.009185,0.003605,0.000258,9.484843e-06


In [None]:
pred_df.to_csv('trial1_sep_akash.csv',index=False)