### Classification for Shape Image Data (Square, Triangle Circle) from 2-3 Different Frequencies 

In [1]:
from ReadData import *
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.model_selection import cross_val_score


Read in the square and circle images 

In [2]:
def readData(path, getTargetName = False):
    # print(getTargetName)
    data = getEMData(path)
    target = []
    field = []
    freq = []
    if getTargetName:
        targetName = []
        upper_case = [char for char in path if char.isupper()]
        target_name = path[path.index(upper_case[0]):path.index(upper_case[1])]

    for i in range(0, len(data)):
        target.append(data[i].target)
        field.append(data[i].Esct.real)
        freq.append(data[i].freq)
        if getTargetName: targetName.append(target_name)

    field = np.array(field)
    target = np.array(target)
    freq = np.array(freq)
    if getTargetName: targetName = np.array(targetName)

    result = (field, target, freq)
    if getTargetName: result = (field, target, freq, targetName)
    return result

In [3]:
fieldCirc, targetCirc, freqCirc, shapeCirc = readData(path = "./CircDifFreq/", getTargetName=True)

Read 310 samples


In [4]:
fieldTri, targetTri, freqTri, shapeTri = readData("./TriangleDifFreq/", True)

Read 350 samples


In [6]:
fieldCT = np.concatenate((fieldCirc, fieldTri))
targetCT = np.concatenate((targetCirc, targetTri))
freqCT = np.concatenate((freqCirc, freqTri))
shapeCT = np.concatenate((shapeCirc, shapeTri))

In [7]:
field_train_CT, field_test_CT, target_train_CT, target_test_CT, freq_train_CT, freq_test_CT, shape_train_CT, shape_test_CT  = train_test_split(
    fieldCT, targetCT, freqCT, shapeCT, test_size=0.20, random_state=123)

In [9]:
field_test_CT = field_test_CT.reshape(len(field_test_CT),-1)

field_train_CT = field_train_CT.reshape(len(field_train_CT),-1)

[field_test_CT.shape, field_train_CT.shape]

[(132, 576), (528, 576)]

In [10]:
rdf_clf = RandomForestClassifier(bootstrap = False, 
                                max_features='sqrt',
                                n_jobs=1, 
                                random_state=123)

In [11]:
svm_clf = svm.SVC(kernel="rbf", random_state=123)

Binary Classification by Shape

In [94]:
rdf_clf1 = rdf_clf.fit(field_train_CT, shape_train_CT)

In [95]:
preds = rdf_clf1.predict(field_test_CT)
print("Accuracy:", accuracy_score(shape_test_CT,preds))

Accuracy: 0.9924242424242424


In [96]:
svm1 = svm_clf.fit(field_train_CT, shape_train_CT)

In [97]:
preds1 = svm1.predict(field_test_CT)
print("Accuracy:", accuracy_score(shape_test_CT,preds1))

Accuracy: 0.9696969696969697


In [16]:
shape_test_CT

array(['Circ', 'Triangle', 'Circ', 'Triangle', 'Circ', 'Circ', 'Triangle',
       'Circ', 'Triangle', 'Triangle', 'Triangle', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Triangle', 'Triangle', 'Circ', 'Triangle', 'Circ',
       'Circ', 'Triangle', 'Triangle', 'Triangle', 'Circ', 'Circ', 'Circ',
       'Triangle', 'Triangle', 'Triangle', 'Triangle', 'Triangle',
       'Triangle', 'Triangle', 'Triangle', 'Triangle', 'Triangle', 'Circ',
       'Circ', 'Circ', 'Circ', 'Triangle', 'Circ', 'Triangle', 'Circ',
       'Triangle', 'Triangle', 'Circ', 'Triangle', 'Triangle', 'Circ',
       'Triangle', 'Triangle', 'Triangle', 'Triangle', 'Triangle', 'Circ',
       'Triangle', 'Triangle', 'Circ', 'Circ', 'Circ', 'Triangle',
       'Triangle', 'Circ', 'Circ', 'Triangle', 'Triangle', 'Circ',
       'Triangle', 'Triangle', 'Triangle', 'Circ', 'Triangle', 'Triangle',
       'Triangle', 'Circ', 'Circ', 'Triangle', 'Triangle', 'Circ',
       'Triangle', 'Circ', 'Circ', 'Circ', 'Circ', 'Triangle', 'T

In [17]:
fieldSqr, targetSqr, freqSqr, shapeSqr = readData("./SqrDifFreq/", True)

Read 390 samples


In [18]:
fieldSqr = fieldSqr.reshape(len(fieldSqr),-1)
shapeSqr = shapeSqr.reshape(len(shapeSqr),-1)

In [101]:
predSqr = rdf_clf1.predict(fieldSqr)
# print("Accuracy:", accuracy_score(shapeSqr,preds))

In [102]:
predSqr

array(['Circ', 'Circ', 'Circ', 'Circ', 'Triangle', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Triangle', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Triangle', 'Circ',
       'Triangle', 'Triangle', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Triangle', 'Circ', 'Circ',
       'Triangle', 'Triangle', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Triangle',
       'Triangle', 'Circ', 'Circ', 'Triangle', 'Circ', 'Circ', 'Circ',
       'Triangle', 'Circ', 'Circ', 'Circ', 'Triangle', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'C

In [103]:
import collections
collections.Counter(predSqr)

Counter({'Circ': 260, 'Triangle': 130})

In [104]:
circle_indices = [index for index, shape in enumerate(predSqr) if shape == "Circ"]
triangle_indices = [index for index, shape in enumerate(predSqr) if shape == "Triangle"]

print(np.unique(freqSqr[circle_indices]))
print(np.unique(freqSqr[triangle_indices]))

[1.e+09 2.e+09 3.e+09]
[1.e+09 2.e+09 3.e+09]


In [105]:
predSqr_svm = svm1.predict(fieldSqr)
# print("Accuracy:", accuracy_score(shapeSqr,preds))

In [106]:
predSqr_svm

array(['Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Triangle', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ',
       'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Circ', 'Triangle',
      

In [107]:
import collections
collections.Counter(predSqr_svm)

Counter({'Circ': 295, 'Triangle': 95})

In [108]:
circle_indices = [index for index, shape in enumerate(predSqr_svm) if shape == "Circ"]
triangle_indices = [index for index, shape in enumerate(predSqr_svm) if shape == "Triangle"]

print(np.unique(freqSqr[circle_indices]))
print(np.unique(freqSqr[triangle_indices]))

[1.e+09 2.e+09]
[1.e+09 3.e+09]


It seems that squares are mostly classified as circles and those squares that are classified as triangles have frequencies 1e9 and 3e9

In [24]:
[field_test_CT.shape, field_train_CT.shape]

[(132, 576), (528, 576)]

Binary Classification by Frequency

In [25]:
freq_indices = [index for index, freq in enumerate(freqCT) if freq in [1e9,2e9]]

In [24]:
len(freq_indices)

320

In [26]:
freqCT_12 = freqCT[freq_indices]
shape_12 = shapeCT[freq_indices]
fieldCT_12 = fieldCT[freq_indices]
targetCT_12 = targetCT[freq_indices]

In [27]:
field_train_CT12, field_test_CT12, target_train_CT12, target_test_CT12, freq_train_CT12, freq_test_CT12, shape_train_CT12, shape_test_CT12  = train_test_split(
    fieldCT_12, targetCT_12 , freqCT_12, shape_12, test_size=0.20, random_state=567)

In [28]:
field_train_CT12, field_valid_CT12, target_train_CT12, target_valid_CT12, freq_train_CT12, freq_valid_CT12, shape_train_CT12, shape_valid_CT12  = train_test_split(
    field_train_CT12, target_train_CT12 , freq_train_CT12, shape_train_CT12, test_size=0.20, random_state=567)

In [29]:
field_test_CT12 = field_test_CT12.reshape(len(field_test_CT12),-1)

field_train_CT12 = field_train_CT12.reshape(len(field_train_CT12),-1)

field_valid_CT12 = field_valid_CT12.reshape(len(field_valid_CT12),-1)

[field_test_CT12.shape, field_train_CT12.shape, field_valid_CT12.shape]

[(98, 576), (313, 576), (79, 576)]

In [30]:
rdf_clf.fit(field_train_CT12, freq_train_CT12)

In [31]:
preds2 = rdf_clf.predict(field_valid_CT12)
print("Accuracy:", accuracy_score(freq_valid_CT12,preds2))

Accuracy: 1.0


In [32]:
cross_val_score(rdf_clf, field_train_CT12, freq_train_CT12, cv=5, scoring="accuracy")

array([1., 1., 1., 1., 1.])

Might be overfit, try with a larger dataset

In [33]:
svm_clf.fit(field_train_CT12,freq_train_CT12)

In [34]:
preds3 = svm_clf.predict(field_test_CT12)
print("Accuracy:", accuracy_score(freq_test_CT12,preds3))

Accuracy: 0.9897959183673469


In [35]:
freq_indices_13 = [index for index, freq in enumerate(freqCT) if freq in [1e9,3e9]]

In [36]:
freqCT_13 = freqCT[freq_indices_13]
shape_13 = shapeCT[freq_indices_13]
fieldCT_13 = fieldCT[freq_indices_13]
targetCT_13 = targetCT[freq_indices_13]

In [37]:
field_train_CT13, field_test_CT13, target_train_CT13, target_test_CT13, freq_train_CT13, freq_test_CT13, shape_train_CT13, shape_test_CT13  = train_test_split(
    fieldCT_13, targetCT_13 , freqCT_13, shape_13, test_size=0.20, random_state=567)

In [38]:
field_train_CT13, field_valid_CT13, target_train_CT13, target_valid_CT13, freq_train_CT13, freq_valid_CT13, shape_train_CT13, shape_valid_CT13  = train_test_split(
    field_train_CT13, target_train_CT13 , freq_train_CT13, shape_train_CT13, test_size=0.20, random_state=567)

In [39]:
field_test_CT13 = field_test_CT13.reshape(len(field_test_CT13),-1)

field_train_CT13 = field_train_CT13.reshape(len(field_train_CT13),-1)

field_valid_CT13 = field_valid_CT13.reshape(len(field_valid_CT13),-1)

[field_test_CT13.shape, field_train_CT13.shape]

[(86, 576), (275, 576)]

In [70]:
from sklearn import utils
print(utils.multiclass.type_of_target(freq_train_CT12))
print(utils.multiclass.type_of_target(freq_train_CT13))

binary
continuous


In [40]:
from sklearn import preprocessing
lab_enc = preprocessing.LabelEncoder()
encoded_freq_train_CT13 = lab_enc.fit_transform(freq_train_CT13)

In [41]:
rdf_clf.fit(field_train_CT13, encoded_freq_train_CT13)

In [42]:
preds4 = rdf_clf.predict(field_valid_CT13)
print("Accuracy:", accuracy_score(lab_enc.fit_transform(freq_valid_CT13),preds4))

Accuracy: 1.0


In [43]:
cross_val_score(rdf_clf, field_train_CT13, encoded_freq_train_CT13, cv=5, scoring="accuracy")

array([1.        , 1.        , 0.98181818, 1.        , 1.        ])

Might be overfit, try with a larger dataset. or images with different frequencies have significantly different field images so the model can easily classify it??

In [44]:
svm_clf.fit(field_train_CT13,encoded_freq_train_CT13)

In [45]:
preds5 = svm_clf.predict(field_test_CT13)
print("Accuracy:", accuracy_score(lab_enc.fit_transform(freq_test_CT13),preds5))

Accuracy: 1.0


OneHotEncoder for 3 frequencies and include that in Shape classification

In [47]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)

In [48]:
oneHotFreq_train = encoder.fit_transform(freq_train_CT.reshape(-1, 1))
oneHotFreq_train.shape

(528, 3)

In [50]:
df = pd.DataFrame({'field_img': field_train_CT.tolist(), 'freq': oneHotFreq_train.tolist()})
df

Unnamed: 0,field_img,freq
0,"[-0.019394890183889547, -0.01728100442593361, ...","[0.0, 1.0, 0.0]"
1,"[-0.041120911450331386, 0.061638881506470775, ...","[0.0, 0.0, 1.0]"
2,"[0.009708487853565383, 0.0246319557683847, 0.0...","[0.0, 1.0, 0.0]"
3,"[-0.0320806376303894, -0.028961346801834653, -...","[0.0, 0.0, 1.0]"
4,"[0.004362090970524132, 0.008057309218101247, 0...","[1.0, 0.0, 0.0]"
...,...,...
523,"[0.015656443462797705, 0.015146241011859515, 0...","[1.0, 0.0, 0.0]"
524,"[0.010672062379937487, 0.01089342987174621, 0....","[0.0, 0.0, 1.0]"
525,"[-0.006516826266212227, -0.01149381690694106, ...","[0.0, 0.0, 1.0]"
526,"[0.0014787832931566006, 0.0038129766036034338,...","[1.0, 0.0, 0.0]"


In [75]:
encoded_freq_train_CT = lab_enc.fit_transform(freq_train_CT)
encoded_freq_train_CT

array([1, 2, 1, 2, 0, 0, 1, 2, 0, 1, 1, 2, 1, 2, 0, 0, 2, 0, 2, 2, 0, 2,
       1, 1, 1, 2, 2, 0, 2, 2, 1, 1, 1, 1, 1, 0, 1, 0, 2, 2, 0, 0, 0, 0,
       0, 1, 1, 1, 2, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2,
       2, 0, 0, 2, 0, 1, 1, 2, 2, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 2, 1,
       0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 2, 1, 0, 0, 1,
       1, 0, 2, 0, 1, 1, 2, 2, 1, 0, 1, 0, 0, 1, 2, 0, 2, 2, 1, 2, 1, 1,
       2, 2, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 2, 0, 0, 2, 0,
       0, 1, 1, 0, 0, 1, 0, 1, 2, 0, 0, 2, 2, 1, 2, 0, 2, 2, 0, 0, 1, 0,
       0, 0, 2, 2, 1, 0, 0, 2, 0, 2, 0, 2, 0, 1, 1, 1, 1, 1, 2, 2, 1, 0,
       1, 1, 1, 0, 0, 0, 1, 1, 2, 2, 2, 0, 2, 0, 0, 1, 0, 0, 0, 1, 2, 0,
       0, 1, 2, 0, 1, 2, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0,
       2, 1, 0, 0, 1, 0, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 0, 2, 2,
       0, 1, 0, 1, 1, 0, 2, 0, 1, 0, 1, 1, 2, 0, 0, 2, 2, 1, 0, 0, 0, 0,
       2, 2, 0, 2, 0, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1,

In [74]:
svm_clf.fit(field_train_CT, encoded_freq_train_CT)

In [82]:
preds6 = svm_clf.predict(field_test_CT)
# lab_enc.fit_transform(freq_test_CT)
print("Accuracy:", accuracy_score(lab_enc.fit_transform(freq_test_CT),preds6))

Accuracy: 0.9924242424242424


In [92]:
rdf_clf.fit(field_train_CT, encoded_freq_train_CT)

In [93]:
preds7 = rdf_clf.predict(field_train_CT)

print("Accuracy:", accuracy_score(lab_enc.fit_transform(freq_train_CT),preds7))

Accuracy: 1.0


In [83]:
lab_enc.fit_transform(freq_train_CT)

array([1, 2, 1, 2, 0, 0, 1, 2, 0, 1, 1, 2, 1, 2, 0, 0, 2, 0, 2, 2, 0, 2,
       1, 1, 1, 2, 2, 0, 2, 2, 1, 1, 1, 1, 1, 0, 1, 0, 2, 2, 0, 0, 0, 0,
       0, 1, 1, 1, 2, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2,
       2, 0, 0, 2, 0, 1, 1, 2, 2, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 2, 1,
       0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 2, 1, 0, 0, 1,
       1, 0, 2, 0, 1, 1, 2, 2, 1, 0, 1, 0, 0, 1, 2, 0, 2, 2, 1, 2, 1, 1,
       2, 2, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 2, 0, 0, 2, 0,
       0, 1, 1, 0, 0, 1, 0, 1, 2, 0, 0, 2, 2, 1, 2, 0, 2, 2, 0, 0, 1, 0,
       0, 0, 2, 2, 1, 0, 0, 2, 0, 2, 0, 2, 0, 1, 1, 1, 1, 1, 2, 2, 1, 0,
       1, 1, 1, 0, 0, 0, 1, 1, 2, 2, 2, 0, 2, 0, 0, 1, 0, 0, 0, 1, 2, 0,
       0, 1, 2, 0, 1, 2, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0,
       2, 1, 0, 0, 1, 0, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 0, 2, 2,
       0, 1, 0, 1, 1, 0, 2, 0, 1, 0, 1, 1, 2, 0, 0, 2, 2, 1, 0, 0, 0, 0,
       2, 2, 0, 2, 0, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1,

In [88]:
set(lab_enc.inverse_transform(preds7))

{1000000000.0, 2000000000.0}