In [None]:
!pip uninstall opencv-contrib-python
!pip uninstall opencv-python
!pip install opencv-python==3.4.2.17
!pip install opencv-contrib-python==3.4.2.17

In [None]:
from skimage.transform import warp, AffineTransform
from skimage.filters import gaussian
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
def extractSIFT(image):
  sift = cv2.xfeatures2d.SIFT_create(5)
  keypoints, descriptors = sift.detectAndCompute(image, None)
  return descriptors

In [None]:
def extractSURF(image):
   surf = cv2.xfeatures2d.SURF_create(5)
   keypoints, descriptors = surf.detectAndCompute(image, None)
   return descriptors

In [None]:
!mkdir '/content/images'
!unzip '/content/drive/MyDrive/rice disease/Images.zip' -d '/content/images'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/images/id_c8sl86v71p.jpg  
  inflating: /content/images/id_c8sl86v71p_rgn.jpg  
  inflating: /content/images/id_c8zqlaoscm.jpg  
  inflating: /content/images/id_c8zqlaoscm_rgn.jpg  
  inflating: /content/images/id_c8zzji9xf7.jpg  
  inflating: /content/images/id_c8zzji9xf7_rgn.jpg  
  inflating: /content/images/id_c996aefk2b.jpg  
  inflating: /content/images/id_c996aefk2b_rgn.jpg  
  inflating: /content/images/id_ca1ulb7phu.jpg  
  inflating: /content/images/id_ca1ulb7phu_rgn.jpg  
  inflating: /content/images/id_cai4ccm9qm.jpg  
  inflating: /content/images/id_cai4ccm9qm_rgn.jpg  
  inflating: /content/images/id_carqo16fzo.jpg  
  inflating: /content/images/id_carqo16fzo_rgn.jpg  
  inflating: /content/images/id_cawcl2dlbx.jpg  
  inflating: /content/images/id_cawcl2dlbx_rgn.jpg  
  inflating: /content/images/id_cb2ltrvv0p.jpg  
  inflating: /content/images/id_cb2ltrvv0p_rgn.jpg  
  inflating: /con

In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/rice disease/Train.csv',index_col=False)

In [None]:
train_df.head()

Unnamed: 0,Image_id,Label
0,id_004wknd7qd.jpg,blast
1,id_004wknd7qd_rgn.jpg,blast
2,id_005sitfgr2.jpg,brown
3,id_005sitfgr2_rgn.jpg,brown
4,id_00stp9t6m6.jpg,blast


In [None]:
index = [idx for idx in range(5340) if idx%2 !=0]

In [None]:
train_df_new = train_df.drop(index,axis=0).reset_index(drop=True)

In [None]:
train , validation = train_test_split(train_df_new,test_size=0.2,stratify=train_df_new['Label'],random_state=42,)

In [None]:
idg = ImageDataGenerator(rotation_range=40,horizontal_flip=True,vertical_flip=True)


In [None]:
train_dfgn = idg.flow_from_dataframe(train,'/content/images',x_col = 'Image_id',y_col='Label',batch_size=1,target_size=(224,224),class_mode='sparse',data_format='channels_last')

Found 2136 validated image filenames belonging to 3 classes.


In [None]:
train_dfgn.class_indices

{'blast': 0, 'brown': 1, 'healthy': 2}

In [None]:
def extractFeatures(dataFrame,extractor,test=False):
  imageIds = dataFrame['Image_id']
  if not test:
    images_label = dataFrame['Label']
  dataset_df = pd.DataFrame()
  for imageId in imageIds:
    image = cv2.imread(f'/content/images/{imageId}')
    image = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY),(224,224))
    #rgnid = imageId.split('.jpg')[0]+'_rgn.jpg'
    #image_rgn = cv2.imread(f'/content/images/{rgnid}')
    #image_rgn = cv2.resize(cv2.cvtColor(image_rgn, cv2.COLOR_BGR2RGB),(224,224))
    features_sift = extractor(image).flatten()
    #features_sift_rgn = extractSIFT(image_rgn).flatten()
    #features_surf = extractSURF(image).flatten()
    #final_features = np.concatenate([features_sift,features_sift_rgn])

    dataset_df = dataset_df.append(pd.Series(features_sift), ignore_index=True)
  if not test  :
    return pd.concat([imageIds,dataset_df,images_label])
  else: return dataset_df

In [None]:
def extractFeaturesAugmented():
  dataset_df = pd.DataFrame()
  labels = pd.DataFrame()
  for _ in range(len(train)):
    x,y = train_dfgn.next()
    image = cv2.normalize(x[0], None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    features_sift = extractSIFT(image).flatten()
    dataset_df = dataset_df.append(pd.Series(features_sift), ignore_index=True)
    labels = labels.append(pd.Series(int(y[0])),ignore_index=True)
  return pd.concat([dataset_df,labels],axis=1)

In [None]:
def imputeNullValues(data, ratio=50):
    '''
    this function accepts a dataframe which contains NaN values and
    the goal is impute these values with various techniques ==> mean()
    '''
    def clean_column(series):
        series = pd.to_numeric(series, downcast="float")
        avg = series.mean()
        return series.fillna(avg)

    for i in range((data.shape)[1]):
        if (data[i].isna().sum() / (data.shape)[0]) * 100 >= ratio:
            data.drop(data[[i]], axis=1, inplace=True)
        else:
            data[i] = clean_column(data[i])

    return data

In [None]:
training_features,y = extractFeatures(train)

In [None]:
train

Unnamed: 0,Image_id,Label
339,id_4q1gs6sy07.jpg,brown
57,id_0rt7042emf.jpg,blast
2485,id_xhh2e7a3id.jpg,blast
6,id_01z6i8am9b.jpg,blast
2093,id_sae50ick1t.jpg,brown
...,...,...
925,id_cfz0tbjaec.jpg,blast
2379,id_w02anaqd5p.jpg,blast
1491,id_jy0g9jz7q2.jpg,blast
2085,id_s6dqnz9oa4.jpg,blast


In [None]:
training_features_aug = extractFeaturesAugmented()

In [None]:
labelsAug = pd.DataFrame(np.int32(training_features_aug.iloc[:,-1]))

In [None]:
labelsNormal

339     1
57      0
2485    0
6       0
2093    1
       ..
925     0
2379    0
1491    0
2085    0
1725    2
Name: Label, Length: 2136, dtype: int64

In [None]:
labelsNormal = y.map(train_dfgn.class_indices)

In [None]:
labels_concat = pd.concat([labelsNormal,labelsAug],axis=0).reset_index(drop=True)

In [None]:
labels_concat.columns=['Label']

In [None]:
training_features_aug = training_features_aug.drop(training_features_aug.iloc[:,-1],axis=1)

In [None]:
training_features_df = pd.concat([training_features,training_features_aug],axis=0)

In [None]:
labels_concat

Unnamed: 0,Label
0,1
1,0
2,0
3,0
4,1
...,...
4267,0
4268,1
4269,1
4270,0


In [None]:
from sklearn.utils import shuffle


In [None]:
training_features_df = pd.concat([training_features_df.reset_index(drop=True),labels_concat.reset_index(drop=True)],axis=1)

In [None]:
training_features_df = imputeNullValues(training_features_df)

In [None]:
training_features_df = shuffle(training_features_df)

In [None]:
training_features_df.to_csv('/content/drive/MyDrive/rice disease/train_sift_v2.csv',index=False)

In [None]:
validation

In [None]:
val_features,label_val = extractFeatures(validation)

In [None]:
label_val = label_val.map(train_dfgn.class_indices)

In [None]:
val_features = pd.concat([val_features.reset_index(drop=True),label_val.reset_index(drop=True)],axis=1)

In [None]:
val_features = imputeNullValues(val_features)

In [None]:
val_features.to_csv('/content/drive/MyDrive/rice disease/val_sift_v2.csv',index=False)

In [None]:
test  = pd.read_csv('/content/SampleSubmission.csv')

In [None]:
features_df_test = extractFeatures(test,True)

In [None]:
features_df_test = imputeNullValues(features_df_test)

In [None]:
features_df_test.to_csv('/content/drive/MyDrive/rice disease/test_sift_v2.csv',index=False)