# Open image and covert to dataframe

In [1]:
import spectral.io.envi as envi

# location for image 
loc = '/home/lql/Desktop/rs_mlsvm/data/' 

img = envi.open(loc + '2003_mod09.hdr', loc + '2003_mod09.img')
img = img[:,:,:]

In [2]:
mask = envi.open(loc + 'ak03_mask.hdr', loc + 'ak03_mask.img')
mask = mask[:,:,:]

In [3]:
print (mask.shape)
print (img.shape)

(1871, 1921, 1)
(1871, 1921, 260)


In [4]:
import numpy as np

# number of pixels per category (0 = no fire, 1 = fire, 2 = background)
(unique, counts) = np.unique(mask, return_counts=True)
np.asarray((unique, counts)).T

array([[      0, 1846606],
       [      1,    8511],
       [      2, 1739074]])

In [5]:
import pandas as pd
l = mask.flatten()
df = pd.DataFrame(data=l, columns=['label'])

In [6]:
# loop through bands and add to df
for i in range(0, img.shape[2]):
    #print (i)
    b = img[:,:,i].flatten()
    df['b'+str(i)] = b

In [7]:
# remove NaN (label = 2)
df = df[df.label != 2]

In [8]:
df = df.dropna()

In [9]:
df.head(5)

Unnamed: 0,label,b0,b1,b2,b3,b4,b5,b6,b7,b8,...,b250,b251,b252,b253,b254,b255,b256,b257,b258,b259
1637,0,0.8273,0.7732,0.8644,0.8477,0.3963,0.1205,0.0658,-0.033802,-0.532481,...,0.5754,0.5211,0.6708,0.636,0.3229,0.1206,0.084,-0.049521,2.360877,-0.051018
3556,0,0.7689,0.721,0.8172,0.7926,0.3663,0.0705,0.0284,-0.03215,-0.583009,...,0.3853,0.2658,0.6024,0.5116,0.1873,0.0782,0.0744,-0.183536,0.317684,-0.155721
3557,0,0.7689,0.721,0.8172,0.7926,0.3663,0.0705,0.0284,-0.03215,-0.583009,...,0.3853,0.2658,0.6024,0.5116,0.1873,0.0782,0.0744,-0.183536,0.317684,-0.155721
3558,0,0.9477,0.9068,0.9375,0.9528,0.4468,0.104,0.041,-0.022054,-0.182021,...,0.476,0.3543,0.6525,0.6042,0.2317,0.1021,0.0611,-0.146574,0.445168,-0.137225
5475,0,0.774,0.7204,0.8356,0.8101,0.3733,0.0834,0.0357,-0.035867,-1.375774,...,0.3461,0.2486,0.5577,0.4455,0.147,0.0886,0.0679,-0.163948,0.28424,-0.133598


# Need to change labels for MLSVM (-1,1)
### Make sure the number of minority class (lbl==1) is smaller than number of majority class (lbl==-1)

In [10]:
# no wildfire vs wildfire
df.label.value_counts()

0    1801226
1       8287
Name: label, dtype: int64

In [11]:
df2 = df.copy()
df2 = df2.astype({'label': 'int16'})

In [12]:
df2['label'] = df2['label'].replace(0,-1)

In [13]:
df2.label.value_counts()

-1    1801226
 1       8287
Name: label, dtype: int64

In [16]:
df2.head(5)

Unnamed: 0,label,b0,b1,b2,b3,b4,b5,b6,b7,b8,...,b250,b251,b252,b253,b254,b255,b256,b257,b258,b259
1637,-1,0.8273,0.7732,0.8644,0.8477,0.3963,0.1205,0.0658,-0.033802,-0.532481,...,0.5754,0.5211,0.6708,0.636,0.3229,0.1206,0.084,-0.049521,2.360877,-0.051018
3556,-1,0.7689,0.721,0.8172,0.7926,0.3663,0.0705,0.0284,-0.03215,-0.583009,...,0.3853,0.2658,0.6024,0.5116,0.1873,0.0782,0.0744,-0.183536,0.317684,-0.155721
3557,-1,0.7689,0.721,0.8172,0.7926,0.3663,0.0705,0.0284,-0.03215,-0.583009,...,0.3853,0.2658,0.6024,0.5116,0.1873,0.0782,0.0744,-0.183536,0.317684,-0.155721
3558,-1,0.9477,0.9068,0.9375,0.9528,0.4468,0.104,0.041,-0.022054,-0.182021,...,0.476,0.3543,0.6525,0.6042,0.2317,0.1021,0.0611,-0.146574,0.445168,-0.137225
5475,-1,0.774,0.7204,0.8356,0.8101,0.3733,0.0834,0.0357,-0.035867,-1.375774,...,0.3461,0.2486,0.5577,0.4455,0.147,0.0886,0.0679,-0.163948,0.28424,-0.133598


In [17]:
df2.shape

(1809513, 261)

# Create validation set (statstics after running MLSVM)

In [32]:
# select grid of image
# 1809513 pixels 

val = df2.shape[0] * 0.10

start = 0
stop = start + round(val)

print (start,stop)

df_val = df2[start:stop]
df_train = df2[stop:len(df2)]

0 180951


In [33]:
y = df_val.label.values
unique, counts = np.unique(y, return_counts=True)
print ('Validation Counts')
print (np.asarray((unique, counts)).T)

y = df_train.label.values
unique, counts = np.unique(y, return_counts=True)
print ('Training Counts')
print (np.asarray((unique, counts)).T)

Validation Counts
[[    -1 179639]
 [     1   1312]]
Training Counts
[[     -1 1621587]
 [      1    6975]]


In [35]:
df_train.to_csv(loc + 'ak03_fire_train.csv',header=None, sep=',', index=None)
df_val.to_csv(loc + 'ak03_fire_val.csv',header=None, sep=',', index=None)