# Imports

In [97]:
from tools import load_dataset as ldts
from tools import feature_name_gen as fng
import tools.feature_extraction as fe
import pandas as pd
import numpy as np

## Load Images

In [98]:
dts_path = 'dataset_test/'

imgs, labels = ldts.load_dataset(dts_path,test=True)


### Create Dataframe

#### IMG
Img (784)

In [99]:
# print(imgs[0])
S_images = []
for img in imgs:
    S_images.append(fe.feature_SerializedImg(img=img))

S_images = np.array(S_images)
print("Data shape: ",S_images.shape)

df = pd.DataFrame(S_images) # Images
df.columns = fng.gen_name("img", 784)
print("Dataframe shape: ", df.shape)


Data shape:  (2000, 784)
Dataframe shape:  (2000, 784)


## Extract New Features and add to Dataframe

#### Mean Color
Img (784) | Mean Color (1)

In [100]:
cMean = []
for img in imgs:
    cMean.append(fe.feature_colorMean(img=img))

cMean = np.array(cMean)
print("Data shape: ", cMean.shape)

temp_df = pd.DataFrame(cMean)
temp_df.columns = ["MeanColor"]
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000,)
Dataframe shape:  (2000, 785)


#### SumImg
Img (784) | Mean Color (1) | SumImg (57)

In [101]:
SumImg = []
for img in imgs:
    SumImg.append(fe.feature_SumImg(img=img))

SumImg = np.array(SumImg)
print("Data shape: ", SumImg.shape)


temp_df = pd.DataFrame(SumImg)
print(temp_df.columns)
temp_df.columns = fng.gen_name("rowSum", 28)+fng.gen_name("colSum", 28)+["allSum"]
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000, 57)
RangeIndex(start=0, stop=57, step=1)
Dataframe shape:  (2000, 842)


#### ColorHistogram
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256)

In [102]:
ColorHistogram = []
for img in imgs:
    ColorHistogram.append(fe.feature_colorHistogram(img=img))

ColorHistogram = np.array(ColorHistogram)
ColorHistogram = np.squeeze(ColorHistogram)
print("Data shape: ", ColorHistogram.shape)

temp_df = pd.DataFrame(ColorHistogram)
temp_df.columns = fng.gen_name("CHistogram", 256)
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000, 256)
Dataframe shape:  (2000, 1098)


#### Noise
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256) | feature_noise (57)

In [103]:
noise = []
for img in imgs:
    noise.append(fe.feature_noise(img=img))

noise = np.array(noise)
print("Data shape: ", noise.shape)

temp_df = pd.DataFrame(noise)
temp_df.columns = fng.gen_name("rowNoise", 28)+fng.gen_name("colNoise", 28)+["allNoise"]
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000, 57)
Dataframe shape:  (2000, 1155)


#### Gaussian Blur
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256) | feature_noise (57) | feature_gBlur (784)

In [104]:
gBlur = []
for img in imgs:
    gb = fe.feature_gBlur(img=img)
    gBlur.append(fe.feature_SerializedImg(img=gb))

gBlur = np.array(gBlur)
print("Data shape: ", gBlur.shape)

temp_df = pd.DataFrame(gBlur)
temp_df.columns = fng.gen_name("gBlur", 784)
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000, 784)
Dataframe shape:  (2000, 1939)


#### Gaussian Blur Mean
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256) | noise (57) | gBlur (784) | gBlurMean (1)

In [105]:
gBlurMean = []
for img in imgs:
    gb = fe.feature_gBlur(img=img)
    gBlurMean.append(fe.feature_colorMean(img=gb))

gBlurMean = np.array(gBlurMean)
print("Data shape: ", gBlurMean.shape)

temp_df = pd.DataFrame(gBlurMean)
temp_df.columns = ["gBlurMean"]
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000,)
Dataframe shape:  (2000, 1940)


#### Gaussian Blur Sum
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256) | noise (57) | gBlur (784) | gBlurMean (1) | gBlurSum (57)

In [106]:
gBlurSum = []
for img in imgs:
    gb = fe.feature_gBlur(img=img)
    gBlurSum.append(fe.feature_SumImg(img=gb))

gBlurSum = np.array(gBlurSum)
print("Data shape: ", gBlurSum.shape)

temp_df = pd.DataFrame(gBlurSum)
temp_df.columns = fng.gen_name("rowGBlur", 28) + \
    fng.gen_name("colGBlur", 28)+["allGBlur"]
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000, 57)
Dataframe shape:  (2000, 1997)


#### Median Blur
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256) | noise (57) | gBlur (784) | gBlurMean (1) | gBlurSum (57) | mBlur (784)

In [107]:
mBlur = []
for img in imgs:
    mb = fe.feature_medianBlur(img=img)
    mBlur.append(fe.feature_SerializedImg(img=mb))

mBlur = np.array(mBlur)
print("Data shape: ", mBlur.shape)

temp_df = pd.DataFrame(mBlur)
temp_df.columns = fng.gen_name("mBlur", 784)
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000, 784)
Dataframe shape:  (2000, 2781)


#### Median Blur Mean
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256) | noise (57) | gBlur (784) | gBlurMean (1) | gBlurSum (57) | mBlur (784) | mBlurMean (1)

In [108]:
mBlurMean = []
for img in imgs:
    gb = fe.feature_medianBlur(img=img)
    mBlurMean.append(fe.feature_colorMean(img=mb))

mBlurMean = np.array(mBlurMean)
print("Data shape: ", mBlurMean.shape)

temp_df = pd.DataFrame(gBlurMean)
temp_df.columns = ["mBlurMean"]
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000,)
Dataframe shape:  (2000, 2782)


#### Median Blur Sum
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256) | noise (57) | gBlur (784) | gBlurMean (1) | gBlurSum (57) | mBlur (784) | mBlurMean (1) | mBlurSum (57)

In [109]:
mBlurSum = []
for img in imgs:
    mb = fe.feature_medianBlur(img=img)
    mBlurSum.append(fe.feature_SumImg(img=mb))

mBlurSum = np.array(mBlurSum)
print("Data shape: ", mBlurSum.shape)

temp_df = pd.DataFrame(mBlurSum)
temp_df.columns = fng.gen_name("rowMBlur", 28) + \
    fng.gen_name("colMBlur", 28)+["allMBlur"]
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000, 57)
Dataframe shape:  (2000, 2839)


#### Canny Edge Detector
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256) | noise (57) | gBlur (784) | gBlurMean (1) | gBlurSum (57) | mBlur (784) | mBlurMean (1) | mBlurSum (57) | Canny (784)

In [110]:
canny = []
for img in imgs:
    cannyDet = fe.feature_cannyEdge(img=img)
    canny.append(fe.feature_SerializedImg(img=cannyDet))

canny = np.array(canny)
print("Data shape: ", canny.shape)

temp_df = pd.DataFrame(canny)
temp_df.columns = fng.gen_name("Canny", 784)
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000, 784)
Dataframe shape:  (2000, 3623)


#### Canny Edge Detector Mean
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256) | noise (57) | gBlur (784) | gBlurMean (1) | gBlurSum (57) | mBlur (784) | mBlurMean (1) | mBlurSum (57) | Canny (784) | CannyMean (1)

In [111]:
cannyMean = []
for img in imgs:
    cM = fe.feature_cannyEdge(img=img)
    cannyMean.append(fe.feature_colorMean(img=cM))

cannyMean = np.array(cannyMean)
print("Data shape: ", cannyMean.shape)

temp_df = pd.DataFrame(cannyMean)
temp_df.columns = ["CannyMean"]
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000,)
Dataframe shape:  (2000, 3624)


#### Canny Edge Detector Sum
Img (784) | Mean Color (1) | SumImg (57) | ColorHistogram (256) | noise (57) | gBlur (784) | gBlurMean (1) | gBlurSum (57) | mBlur (784) | mBlurMean (1) | mBlurSum (57) | Canny (784) | CannyMean (1) | CannySum (57)

In [112]:
cannySum = []
for img in imgs:
    cS = fe.feature_cannyEdge(img=img)
    cannySum.append(fe.feature_SumImg(img=cS))

cannySum = np.array(cannySum)
print("Data shape: ", cannySum.shape)

temp_df = pd.DataFrame(cannySum)
temp_df.columns = fng.gen_name("rowCanny", 28) + \
    fng.gen_name("colCanny", 28)+["allCanny"]
df = pd.concat([df, temp_df], axis=1)
print("Dataframe shape: ", df.shape)


Data shape:  (2000, 57)
Dataframe shape:  (2000, 3681)


## Add Label

In [113]:
# print("Data shape: ", labels.shape)
# temp_df = pd.DataFrame(labels)
# temp_df.columns = ["Label"]
# df = pd.concat([df, temp_df], axis=1)

# print("Dataframe shape: ", df.shape)

## Save

In [114]:
# import pyarrow
df.info()
df.to_csv('p_dataset_test.csv',index=False)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Columns: 3681 entries, img_0 to allCanny
dtypes: float32(256), float64(4), uint32(285), uint8(3136)
memory usage: 10.2 MB


In [115]:
# df = pd.read_csv('p_dataset_train.csv')