___
## Initialization

In [0]:
import pandas as pd
from problog.program import PrologString
from problog.core import ProbLog
from problog import get_evaluatable
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from matplotlib import pyplot as plt

___
## Loading Data

In [0]:
df = pd.read_json("db/database_1.json")

In [0]:
display(df.tail())

Unnamed: 0,CLASS,SENSOR1,SENSOR2,SENSOR3,SENSOR4,SENSOR5,SENSOR6,SENSOR7,SENSOR8,SENSOR9
995,5,77,62,53,89,86,147,180,49,66
996,5,78,60,49,95,83,137,161,46,61
997,2,85,72,79,85,156,154,191,95,74
998,5,75,53,40,93,78,144,171,39,72
999,5,71,55,35,117,70,141,169,29,79


___
## Cleansing Data

In [0]:
# df = df.sample(n=10000)
df.reset_index(inplace=True)
del df['index']
display(df.tail())

Unnamed: 0,CLASS,SENSOR1,SENSOR2,SENSOR3,SENSOR4,SENSOR5,SENSOR6,SENSOR7,SENSOR8,SENSOR9
995,5,77,62,53,89,86,147,180,49,66
996,5,78,60,49,95,83,137,161,46,61
997,2,85,72,79,85,156,154,191,95,74
998,5,75,53,40,93,78,144,171,39,72
999,5,71,55,35,117,70,141,169,29,79


In [0]:
X = df[[item for item in df.columns if item != 'CLASS']]
y = df['CLASS']

In [0]:
print(X.shape,y.shape)

(1000, 9) (1000,)


In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

___
## Machine Learning Training

In [0]:
model = GaussianNB()
model.fit(X_train, y_train)

y_hat = model.predict(X_test)


In [0]:
print(classification_report(y_test, y_hat))

              precision    recall  f1-score   support

           1       0.55      0.39      0.46        74
           2       0.00      0.00      0.00         1
           3       0.27      0.10      0.15        30
           4       0.07      0.05      0.06        21
           5       0.67      0.88      0.76       120
           6       1.00      1.00      1.00         4

   micro avg       0.57      0.57      0.57       250
   macro avg       0.43      0.40      0.40       250
weighted avg       0.54      0.57      0.54       250



___
## Probability Calculation

In [0]:
prob_class_name = ['Prob_class_1', 'Prob_class_2', 'Prob_class_3', 'Prob_class_4', 'Prob_class_5', 'Prob_class_6']

In [0]:
df_with_proba = pd.concat([df, pd.DataFrame(model.predict_proba(X), columns=prob_class_name)], axis=1)
# del df_with_proba['index']

display(df_with_proba.tail())

Unnamed: 0,CLASS,SENSOR1,SENSOR2,SENSOR3,SENSOR4,SENSOR5,SENSOR6,SENSOR7,SENSOR8,SENSOR9,Prob_class_1,Prob_class_2,Prob_class_3,Prob_class_4,Prob_class_5,Prob_class_6
995,5,77,62,53,89,86,147,180,49,66,0.003659,0.031211,0.00019,0.024884,0.940056,8.212032000000001e-23
996,5,78,60,49,95,83,137,161,46,61,5.2e-05,0.000157,4e-06,0.002028,0.99776,1.8139e-16
997,2,85,72,79,85,156,154,191,95,74,0.372577,0.381717,0.081577,0.163097,0.001032,4.230569e-59
998,5,75,53,40,93,78,144,171,39,72,0.000397,0.001083,2.5e-05,0.005477,0.993018,9.548612e-22
999,5,71,55,35,117,70,141,169,29,79,9.2e-05,1.1e-05,1.8e-05,0.002315,0.997564,1.41246e-28


___
## Machine Learning Knowledge Base

In [0]:
def ml_problog_generator(df):
    for row_index,row in df.iterrows():
        suffix = ", ".join(["equal(s{}".format(i) + "," + str(int(row["SENSOR" + str(i)])) + ")" for i in range(1, 9 + 1)]) + "."
        for c in range(1,len(prob_class_name)+1):
            prefix = "{}::{} :- ".format(row['Prob_class_{}'.format(c)], "c{}".format(c))
            total_string = prefix + suffix
            yield total_string

In [0]:
def genarate_problog():
    data_out = ml_problog_generator(df=df_with_proba)

    with open("plp/ml_problog_string.pl", 'w', encoding='utf-8') as fout:
        for line in data_out:
            fout.write(line + "\n")

In [0]:
genarate_problog()
!ls -l | grep "plp/ml_problog_string.pl"

-rw-r--r--  1 Isada  staff    944519 Apr 29 21:33 ml_problog_string.pl


___

In [0]:
problog_string = open("plp/ml_problog_string.pl","r").read()

In [0]:
print(problog_string[:1000])

9.21402621601013e-05::c1 :- equal(s1,74), equal(s2,56), equal(s3,40), equal(s4,129), equal(s5,84), equal(s6,142), equal(s7,169), equal(s8,34), equal(s9,81).
1.716328430096083e-06::c2 :- equal(s1,74), equal(s2,56), equal(s3,40), equal(s4,129), equal(s5,84), equal(s6,142), equal(s7,169), equal(s8,34), equal(s9,81).
4.6193562958786877e-05::c3 :- equal(s1,74), equal(s2,56), equal(s3,40), equal(s4,129), equal(s5,84), equal(s6,142), equal(s7,169), equal(s8,34), equal(s9,81).
0.003267750272540012::c4 :- equal(s1,74), equal(s2,56), equal(s3,40), equal(s4,129), equal(s5,84), equal(s6,142), equal(s7,169), equal(s8,34), equal(s9,81).
0.996592199573912::c5 :- equal(s1,74), equal(s2,56), equal(s3,40), equal(s4,129), equal(s5,84), equal(s6,142), equal(s7,169), equal(s8,34), equal(s9,81).
1.6629617910054378e-35::c6 :- equal(s1,74), equal(s2,56), equal(s3,40), equal(s4,129), equal(s5,84), equal(s6,142), equal(s7,169), equal(s8,34), equal(s9,81).
2.389686351909108e-05::c1 :- equal(s1,70), equal(s2,58),

In [0]:
def add_query_problog(prolog_string, vector):
    for i in range(9):
        prolog_string = prolog_string + "equal(s"+str(i+1)+","+str(vector[i])+").\n"
    
    for i in range(6):
        prolog_string = prolog_string + "query(c"+str(i+1)+").\n"
    return prolog_string

In [0]:
vector = [71,55,35,117,70,141,169,29,79]
problog_string_with_query = add_query_problog(prolog_string=problog_string,vector=vector)

In [0]:
last = len(problog_string_with_query)
print(problog_string_with_query[last-500:-1])

637108428::c5 :- equal(s1,71), equal(s2,55), equal(s3,35), equal(s4,117), equal(s5,70), equal(s6,141), equal(s7,169), equal(s8,29), equal(s9,79).
1.4124595040998083e-28::c6 :- equal(s1,71), equal(s2,55), equal(s3,35), equal(s4,117), equal(s5,70), equal(s6,141), equal(s7,169), equal(s8,29), equal(s9,79).
equal(s1,71).
equal(s2,55).
equal(s3,35).
equal(s4,117).
equal(s5,70).
equal(s6,141).
equal(s7,169).
equal(s8,29).
equal(s9,79).
query(c1).
query(c2).
query(c3).
query(c4).
query(c5).
query(c6).


In [0]:
p = PrologString(problog_string_with_query)
result = get_evaluatable().create_from(p).evaluate()
print(result)

{c1: 9.214732138700007e-05, c2: 1.0946892666999996e-05, c3: 1.8135762821000006e-05, c4: 0.0023151063122859995, c5: 0.997563663710843, c6: 0.0}


___
## Expert Knowledge Creation

In [0]:
expert_knowledge = """t(_)::c1. %developed
t(_)::c2. %agriculture
t(_)::c3. %herbaceous
t(_)::c4. %shrubland
t(_)::c5. %forest
t(_)::c6. %water

%developed
t(_)::c1 :- bareGroundNDVI.
t(_)::c1 :- scarceVegetationNDVI.
%t(_)::c1 :- iceAndSnowNDVI.
%t(_)::c1 :- waterNDVI.

%agriculture
t(_)::c2 :- bareGroundNDVI.
%t(_)::c2 :- scarceVegetationNDVI.
t(_)::c2 :- mediumVegetationNDVI.
%t(_)::c2 :- iceAndSnowNDVI.
%t(_)::c2 :- waterNDVI.

%herbaceous
t(_)::c3 :- mediumVegetationNDVI.
%t(_)::c3 :- iceAndSnowNDVI.
t(_)::c3 :- scarceVegetationNDVI.

%shrubland
t(_)::c4 :- bareGroundNDVI.
t(_)::c4 :- scarceVegetationNDVI.

%forest
t(_)::c5 :- mediumVegetationNDVI.
t(_)::c5 :- thickVegetationNDVI.

%water
t(_)::c6 :- waterNDVI.

t(_)::cloudsNDVI.
t(_)::bareGroundNDVI :- \+cloudsNDVI.
t(_)::iceAndSnowNDVI :- \+bareGroundNDVI.
t(_)::scarceVegetationNDVI :- \+iceAndSnowNDVI.
t(_)::mediumVegetationNDVI :- \+scarceVegetationNDVI.
t(_)::thickVegetationNDVI :- \+mediumVegetationNDVI.
t(_)::waterNDVI :- \+thickVegetationNDVI.
"""
print(expert_knowledge)

t(_)::c1. %developed
t(_)::c2. %agriculture
t(_)::c3. %herbaceous
t(_)::c4. %shrubland
t(_)::c5. %forest
t(_)::c6. %water

%developed
t(_)::c1 :- bareGroundNDVI.
t(_)::c1 :- scarceVegetationNDVI.
%t(_)::c1 :- iceAndSnowNDVI.
%t(_)::c1 :- waterNDVI.

%agriculture
t(_)::c2 :- bareGroundNDVI.
%t(_)::c2 :- scarceVegetationNDVI.
t(_)::c2 :- mediumVegetationNDVI.
%t(_)::c2 :- iceAndSnowNDVI.
%t(_)::c2 :- waterNDVI.

%herbaceous
t(_)::c3 :- mediumVegetationNDVI.
%t(_)::c3 :- iceAndSnowNDVI.
t(_)::c3 :- scarceVegetationNDVI.

%shrubland
t(_)::c4 :- bareGroundNDVI.
t(_)::c4 :- scarceVegetationNDVI.

%forest
t(_)::c5 :- mediumVegetationNDVI.
t(_)::c5 :- thickVegetationNDVI.

%water
t(_)::c6 :- waterNDVI.

t(_)::cloudsNDVI.
t(_)::bareGroundNDVI :- \+cloudsNDVI.
t(_)::iceAndSnowNDVI :- \+bareGroundNDVI.
t(_)::scarceVegetationNDVI :- \+iceAndSnowNDVI.
t(_)::mediumVegetationNDVI :- \+scarceVegetationNDVI.
t(_)::thickVegetationNDVI :- \+mediumVegetationNDVI.
t(_)::waterNDVI :- \+thickVegetationNDVI.


In [0]:
with open("plp/expert_knowledge.pl", 'w', encoding='utf-8') as fout:
    fout.write(expert_knowledge)

___
## Training Expert Knowledge by Evidences

In [0]:
def peek_list(list_,n): 
    cnt = 0
    for item in list_:
        print(item)
        if (cnt==n):
            break
        cnt = cnt + 1

In [0]:
def peek_tail(string):
    last = len(string)
    print(string[last-400:-1])

In [0]:
def ndvi(sensor3,sensor4):
    if ((sensor3 == 0) and (sensor4 ==0)):
        return 0
    else:
        return (sensor4-sensor3)/(sensor4+sensor3)

In [0]:
list_ = []

In [0]:
for index,row in df.iterrows():
    array = {}
    array['sensor3'] = row['SENSOR3']
    array['sensor4'] = row['SENSOR4']
    array['class'] = row['CLASS']
    array['ndvi_value'] = ndvi(sensor3=row['SENSOR3'],sensor4=row['SENSOR4'])
    list_.append(array)
peek_list(list_=list_,n=2)

{'sensor3': 40, 'sensor4': 129, 'class': 1, 'ndvi_value': 0.5266272189349113}
{'sensor3': 37, 'sensor4': 130, 'class': 5, 'ndvi_value': 0.5568862275449101}
{'sensor3': 48, 'sensor4': 103, 'class': 1, 'ndvi_value': 0.36423841059602646}


In [0]:
for item in list_:
    
    # c1 = Developed
    # c2 = Agriculture
    # c3 = Herbaceous
    # c4 = Shrubland
    # c5 = Forest
    # c6 = Water
    # --------------------------
    # 1. bareGroundNDVI.
    # 2. cloudsNDVI.
    # 3. iceAndSnowNDVI.
    # 4. scarceVegetationNDVI.
    # 5. mediumVegetationNDVI.
    # 6. thickVegetationNDVI.
    # 7. waterNDVI
    # ---------------------------
    
    ndvi = item['ndvi_value']
    if((ndvi>=0.5) and (ndvi<=1)): # 1.Thick Vegetation
        item['ndvi_name'] = 'thickVegetationNDVI'
        
    elif ((ndvi>=0.14) and (ndvi<=0.5)):# 2.Medium Vegetation
        item['ndvi_name'] = 'mediumVegetationNDVI'
        
    elif ((ndvi>=0.09) and (ndvi<=0.14)): # 3.Scarce Vegetation
        item['ndvi_name'] = 'scarceVegetationNDVI'
        
    elif ((ndvi>=0.025) and (ndvi<=0.09)): # 4.Bare Ground
        item['ndvi_name'] = 'bareGroundNDVI'

    elif ((ndvi>=0.002) and (ndvi<=0.025)): # 5.Clouds
        item['ndvi_name'] = 'cloudsNDVI'

    elif ((ndvi>=-0.046) and (ndvi<=0.002)): # 6.Ice and Snow
        item['ndvi_name'] = 'iceAndSnowNDVI'

    elif ((ndvi>=-1) and (ndvi<=0.046)): # 7.Water
        item['ndvi_name'] = 'waterNDVI'

In [0]:
sorted_list = sorted(list_, key=lambda item: item['class'])
peek_list(list_=sorted_list,n=2)

{'sensor3': 40, 'sensor4': 129, 'class': 1, 'ndvi_value': 0.5266272189349113, 'ndvi_name': 'thickVegetationNDVI'}
{'sensor3': 48, 'sensor4': 103, 'class': 1, 'ndvi_value': 0.36423841059602646, 'ndvi_name': 'mediumVegetationNDVI'}
{'sensor3': 159, 'sensor4': 80, 'class': 1, 'ndvi_value': -0.3305439330543933, 'ndvi_name': 'waterNDVI'}


In [0]:
def evidence(class_,ndvi_name):
    evi = "evidence({}).\n".format("c"+str(class_))
    evi = evi + "evidence({}).\n".format(ndvi_name)
    evi = evi + "------\n"
    return evi

In [0]:
string = ""

for item in sorted_list:
    string = string + evidence(class_=item['class'],ndvi_name=item['ndvi_name'])
last = len(string)
string = string[:last-7]
peek_tail(string=string)

ence(waterNDVI).
------
evidence(c6).
evidence(waterNDVI).
------
evidence(c6).
evidence(waterNDVI).
------
evidence(c6).
evidence(waterNDVI).
------
evidence(c6).
evidence(waterNDVI).
------
evidence(c6).
evidence(waterNDVI).
------
evidence(c6).
evidence(waterNDVI).
------
evidence(c6).
evidence(bareGroundNDVI).
------
evidence(c6).
evidence(waterNDVI).
------
evidence(c6).
evidence(waterNDVI).


In [0]:
with open("plp/evidence.pl", 'w', encoding='utf-8') as fout:
    fout.write(string)

In [0]:
!problog lfi expert_knowledge.pl evidence.pl -O expert_knowledge_new.pl
!ls -l | grep "plp/expert_knowledge_new.pl"

-22.41661788750215 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.41987521, 0.69622345, 0.36806548, 0.4589436, 0.93969765, 0.13161676, 0.94017965, 0.44226883, 0.73136184, 0.6598882, 0.93127563, 0.42028986, 1.0, 1.0, 0.38222158, 1.0, 1.0, 1.0] [t(_)::c1, t(_)::c2, t(_)::c3, t(_)::c4, t(_)::c5, t(_)::c6, t(_)::c1, t(_)::c1, t(_)::c2, t(_)::c2, t(_)::c3, t(_)::c3, t(_)::c4, t(_)::c4, t(_)::c5, t(_)::c5, t(_)::c6, t(_)::cloudsNDVI, t(_)::bareGroundNDVI, t(_)::iceAndSnowNDVI, t(_)::scarceVegetationNDVI, t(_)::mediumVegetationNDVI, t(_)::thickVegetationNDVI, t(_)::waterNDVI] 1232
-rw-r--r--  1 Isada  staff       885 Apr 29 22:36 expert_knowledge_new.pl


___
## Machine Learning and Expert Knowlege Combination 

In [0]:
def ndvi_and_return_name(sensor3,sensor4):
    ndvi = 0
    ndvi_name = ""
    if ((sensor3 == 0) and (sensor4 ==0)):
        ndvi = 0
    else:
        ndvi = (sensor4-sensor3)/(sensor4+sensor3)
    
    if((ndvi>=0.5) and (ndvi<=1)): # 1.Thick Vegetation
        ndvi_name = 'thickVegetationNDVI'
        
    elif ((ndvi>=0.14) and (ndvi<=0.5)):# 2.Medium Vegetation
        ndvi_name = 'mediumVegetationNDVI'
        
    elif ((ndvi>=0.09) and (ndvi<=0.14)): # 3.Scarce Vegetation
        ndvi_name = 'scarceVegetationNDVI'
        
    elif ((ndvi>=0.025) and (ndvi<=0.09)): # 4.Bare Ground
        ndvi_name = 'bareGroundNDVI'

    elif ((ndvi>=0.002) and (ndvi<=0.025)): # 5.Clouds
        ndvi_name = 'cloudsNDVI'

    elif ((ndvi>=-0.046) and (ndvi<=0.002)): # 6.Ice and Snow
        ndvi_name = 'iceAndSnowNDVI'

    elif ((ndvi>=-1) and (ndvi<=0.046)): # 7.Water
        ndvi_name = 'waterNDVI'
    return ndvi_name 
print(ndvi_and_return_name(sensor3=27,sensor4=239))

thickVegetationNDVI


In [0]:
def add_query_problog_with_ndvi(prolog_string, vector):
    for i in range(9):
        prolog_string = prolog_string + "equal(s"+str(i+1)+","+str(vector[i])+").\n"
    prolog_string = prolog_string + ndvi_and_return_name(sensor3=vector[2],sensor4=vector[3]) +".\n"
    for i in range(6):
        prolog_string = prolog_string + "query(c"+str(i+1)+").\n"
    return prolog_string

In [0]:
with open('plp/expert_knowledge_new.pl','r') as file:
    expert_knowledge_new = file.read()
rules = expert_knowledge_new.splitlines()[6:]
rstring = ""
for rule in rules:
    rstring = rstring + rule + '\n'
print(rstring)

0.419875209527565::c1 :- bareGroundNDVI.
0.696223446813728::c1 :- scarceVegetationNDVI.
0.368065482364758::c2 :- bareGroundNDVI.
0.458943604890671::c2 :- mediumVegetationNDVI.
0.939697649676343::c3 :- mediumVegetationNDVI.
0.131616762976077::c3 :- scarceVegetationNDVI.
0.940179646068921::c4 :- bareGroundNDVI.
0.442268833161366::c4 :- scarceVegetationNDVI.
0.731361841768842::c5 :- mediumVegetationNDVI.
0.659888201533512::c5 :- thickVegetationNDVI.
0.931275629158072::c6 :- waterNDVI.
0.420289855071412::cloudsNDVI.
0.999999999997892::bareGroundNDVI :- \+cloudsNDVI.
1.0::iceAndSnowNDVI :- \+bareGroundNDVI.
0.382221577726337::scarceVegetationNDVI :- \+iceAndSnowNDVI.
1.0::mediumVegetationNDVI :- \+scarceVegetationNDVI.
1.0::thickVegetationNDVI :- \+mediumVegetationNDVI.
1.0::waterNDVI :- \+thickVegetationNDVI.



In [0]:
with open('plp/ml_problog_string.pl','r') as file:
    problog_string = file.read()
    
# from expert_knowledge_new.pl
knowledge_base = problog_string + rstring

with open("plp/knowledge_base.pl", 'w', encoding='utf-8') as fout:
    fout.write(knowledge_base)

In [0]:
with open("plp/knowledge_base.pl",'r') as file:
    kb = file.read()

vector = [71,55,35,117,70,141,169,29,79]
kb_with_query = add_query_problog_with_ndvi(prolog_string=kb,vector=vector)
peek_tail(string=kb_with_query)

ceVegetationNDVI :- \+iceAndSnowNDVI.
1.0::mediumVegetationNDVI :- \+scarceVegetationNDVI.
1.0::thickVegetationNDVI :- \+mediumVegetationNDVI.
1.0::waterNDVI :- \+thickVegetationNDVI.
equal(s1,71).
equal(s2,55).
equal(s3,35).
equal(s4,117).
equal(s5,70).
equal(s6,141).
equal(s7,169).
equal(s8,29).
equal(s9,79).
thickVegetationNDVI.
query(c1).
query(c2).
query(c3).
query(c4).
query(c5).
query(c6).


___
## Evaluation

In [0]:
import json

In [0]:
def predict_plp_model(vector):
    with open("plp/knowledge_base.pl",'r') as file:
        kb = file.read()
    kb_with_query = add_query_problog_with_ndvi(prolog_string=kb,vector=vector)
    p = PrologString(kb_with_query)
    result = get_evaluatable().create_from(p).evaluate()
    return result

In [0]:
def convert_result_to_dict(result):
    str_result = str(result)
    split_result = str_result[1:-1].split(',')
    c_names = ['c'+str(i) for i in range(1,6+1)]
    result_dict = {}
    for i in range(6):
        result_dict[c_names[i]] = float(split_result[i].split(':')[1][1:])
    return result_dict

In [0]:
df_sample = df.sample(n=250)
df_sample

Unnamed: 0,CLASS,SENSOR1,SENSOR2,SENSOR3,SENSOR4,SENSOR5,SENSOR6,SENSOR7,SENSOR8,SENSOR9
897,1,82,68,64,106,92,148,180,51,70
842,5,105,93,103,68,148,147,177,139,64
257,1,94,86,105,80,130,153,190,96,60
296,1,65,50,29,127,63,148,180,23,85
964,1,69,53,41,135,87,140,165,34,66
366,1,75,54,41,98,65,149,181,33,77
53,5,70,51,33,115,64,139,165,26,67
30,1,93,76,72,81,114,142,168,78,63
333,5,80,63,57,75,75,150,185,45,64
717,5,98,85,88,73,118,160,201,88,58


In [0]:
X_sample = df_sample[[item for item in df_sample.columns if item != 'CLASS']]
y_sample = df_sample['CLASS']

### 1. GuassianNB alone

In [0]:
y_hat_sample = model.predict(X_sample)
gnb = pd.DataFrame(model.predict_proba(X_sample))

In [0]:
gnb_pred = pd.concat([gnb, pd.DataFrame(y_hat_sample, columns=['CLASS_pred'])], axis=1)

Unnamed: 0,1,2,3,4,5,6,CLASS_pred
0,7.858084e-03,8.261031e-02,0.000809,7.074954e-02,8.379730e-01,1.074675e-29,5
1,8.313216e-01,1.819681e-05,0.161704,6.956396e-03,8.512325e-08,7.463358e-62,1
2,8.527420e-01,6.429249e-03,0.083824,5.691458e-02,9.011293e-05,2.200665e-45,1
3,1.972761e-03,7.583813e-07,0.000729,7.835015e-03,9.894630e-01,7.146919e-38,5
4,1.830771e-05,2.173620e-08,0.000016,8.251537e-04,9.991408e-01,1.701890e-31,5
5,3.010772e-03,3.615553e-03,0.000181,1.478715e-02,9.784052e-01,3.384128e-26,5
6,2.952773e-05,1.613786e-06,0.000005,7.290283e-04,9.992349e-01,8.026870e-22,5
7,5.845627e-02,1.215654e-01,0.004373,3.279348e-01,4.876701e-01,4.077564e-26,5
8,1.573227e-02,1.564192e-02,0.000484,3.647898e-02,9.316632e-01,4.590291e-19,5
9,9.161986e-01,1.017019e-04,0.060928,2.273717e-02,3.427401e-05,5.992811e-44,1


In [0]:
print("model1")
gnb_class1 = gnb_pred[gnb_pred['CLASS_pred'] == 1]
for i in range(1,6+1):
    print("avg. for class{} given highest class 1 is {}".format(i,gnb_class1[i].mean()))
print("-----")
gnb_class2 = gnb_pred[gnb_pred['CLASS_pred'] == 2]
for i in range(1,6+1):
    print("avg. for class{} given highest class 2 is {}".format(i,gnb_class2[i].mean()))
print("-----")
gnb_class3 = gnb_pred[gnb_pred['CLASS_pred'] == 3]
for i in range(1,6+1):
    print("avg. for class{} given highest class 3 is {}".format(i,gnb_class3[i].mean()))
print("-----")
gnb_class4 = gnb_pred[gnb_pred['CLASS_pred'] == 4]
for i in range(1,6+1):
    print("avg. for class{} given highest class 4 is {}".format(i,gnb_class4[i].mean()))
print("-----")
gnb_class5 = gnb_pred[gnb_pred['CLASS_pred'] == 5]
for i in range(1,6+1):
    print("avg. for class{} given highest class 5 is {}".format(i,gnb_class5[i].mean()))
print("-----")
gnb_class6 = gnb_pred[gnb_pred['CLASS_pred'] == 6]
for i in range(1,6+1):
    print("avg. for class{} given highest class 6 is {}".format(i,gnb_class6[i].mean()))

model1
avg. for class1 given highest class 1 is 0.7185566104564137
avg. for class2 given highest class 1 is 0.035436300012783924
avg. for class3 given highest class 1 is 0.12114700620177175
avg. for class4 given highest class 1 is 0.10725500369570316
avg. for class5 given highest class 1 is 0.017605079633327563
avg. for class6 given highest class 1 is 1.0460460682389136e-21
-----
avg. for class1 given highest class 2 is 0.14314692663384318
avg. for class2 given highest class 2 is 0.49830025121954535
avg. for class3 given highest class 2 is 0.032252225450990574
avg. for class4 given highest class 2 is 0.23809685232045963
avg. for class5 given highest class 2 is 0.08820374437516071
avg. for class6 given highest class 2 is 5.361734786591968e-32
-----
avg. for class1 given highest class 3 is 0.13656724722215993
avg. for class2 given highest class 3 is 9.121066826310461e-07
avg. for class3 given highest class 3 is 0.8514065408678496
avg. for class4 given highest class 3 is 0.011723767626311

In [0]:
gnb.columns = [1,2,3,4,5,6]
for i in range(1,6+1):
    print("mean of class{} = {}".format(i,gnb[i].mean()))

mean of class1 = 0.21651795691079948
mean of class2 = 0.05319782801869601
mean of class3 = 0.07075098366841824
mean of class4 = 0.07565231275592703
mean of class5 = 0.5633900920055384
mean of class6 = 0.020490826640621015


In [0]:
print(classification_report(y_sample,  y_hat_sample ))

              precision    recall  f1-score   support

           1       0.61      0.51      0.55        79
           2       0.09      0.33      0.14         3
           3       0.30      0.09      0.14        32
           4       0.00      0.00      0.00        13
           5       0.65      0.82      0.73       118
           6       1.00      1.00      1.00         5

   micro avg       0.58      0.58      0.58       250
   macro avg       0.44      0.46      0.43       250
weighted avg       0.56      0.58      0.56       250



In [0]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_sample , y_hat_sample)

array([[40,  2,  3,  3, 31,  0],
       [ 0,  1,  0,  0,  2,  0],
       [12,  3,  3,  4, 10,  0],
       [ 2,  2,  0,  0,  9,  0],
       [12,  3,  4,  2, 97,  0],
       [ 0,  0,  0,  0,  0,  5]])

### 2. GuassianNB with Expert Knowledge

In [0]:
sensor_names = ["SENSOR"+str(i) for i in range(1,9+1)]
y_pred_class1 = []
y_pred_class2 = []
y_pred_class3 = []
y_pred_class4 = []
y_pred_class5 = []
y_pred_class6 = []

for index,row in df_sample.iterrows():
    vector_list = []
    for i in range(len(sensor_names)):
        vector_list.append(row[sensor_names[i]])
    result = predict_plp_model(vector_list)
    result_dict = convert_result_to_dict(result)
    y_pred_class1.append(result_dict['c1'])
    y_pred_class2.append(result_dict['c2'])
    y_pred_class3.append(result_dict['c3'])
    y_pred_class4.append(result_dict['c4'])
    y_pred_class5.append(result_dict['c5'])
    y_pred_class6.append(result_dict['c6'])

In [0]:
list_y_pred = []
list_y_pred.append(y_pred_class1)
list_y_pred.append(y_pred_class2)
list_y_pred.append(y_pred_class3)
list_y_pred.append(y_pred_class4)
list_y_pred.append(y_pred_class5)
list_y_pred.append(y_pred_class6)

df_result = pd.DataFrame(list_y_pred).T
df_result.columns = [i for i in range(1,6+1)]
df_result['CLASS_pred'] = df_result.idxmax(axis=1)
df_result.tail()

Unnamed: 0,1,2,3,4,5,6,CLASS_pred
245,0.333088,0.574414,0.941458,0.552298,0.999062,0.931276,5
246,0.33299,0.574396,0.941457,0.551406,0.999651,0.931276,5
247,0.007179,0.463738,0.93972,0.05659,0.980391,0.931276,5
248,0.3329,0.510127,1.0,0.550894,0.715525,0.931276,3
249,0.333737,0.510263,0.760696,0.556874,0.997794,0.0,5


In [0]:
print("model2")
df_result_class1 = df_result[df_result['CLASS_pred'] == 1]
df_result_class2 = df_result[df_result['CLASS_pred'] == 2]
df_result_class3 = df_result[df_result['CLASS_pred'] == 3]
df_result_class4 = df_result[df_result['CLASS_pred'] == 4]
df_result_class5 = df_result[df_result['CLASS_pred'] == 5]
df_result_class6 = df_result[df_result['CLASS_pred'] == 6]
df_result_class = []
for i in range(1,6+1):
    print("avg. for class{} given highest class 1 is {}".format(i,df_result_class1[i].mean()))
print("-----")
for i in range(1,6+1):
    print("avg. for class{} given highest class 2 is {}".format(i,df_result_class2[i].mean()))
print("-----")
for i in range(1,6+1):
    print("avg. for class{} given highest class 3 is {}".format(i,df_result_class3[i].mean()))
print("-----")
for i in range(1,6+1):
    print("avg. for class{} given highest class 4 is {}".format(i,df_result_class4[i].mean()))
print("-----")
for i in range(1,6+1):
    print("avg. for class{} given highest class 5 is {}".format(i,df_result_class5[i].mean()))
print("-----")
for i in range(1,6+1):
    print("avg. for class{} given highest class 6 is {}".format(i,df_result_class6[i].mean()))

model2
avg. for class1 given highest class 1 is 0.9276010754935583
avg. for class2 given highest class 1 is 0.44280939137421693
avg. for class3 given highest class 1 is 0.5911192376125931
avg. for class4 given highest class 1 is 0.6396489667413799
avg. for class5 given highest class 1 is 0.6983726086865635
avg. for class6 given highest class 1 is 0.5881740815735191
-----
avg. for class1 given highest class 2 is nan
avg. for class2 given highest class 2 is nan
avg. for class3 given highest class 2 is nan
avg. for class4 given highest class 2 is nan
avg. for class5 given highest class 2 is nan
avg. for class6 given highest class 2 is nan
-----
avg. for class1 given highest class 3 is 0.525092560567317
avg. for class2 given highest class 3 is 0.6038820702215473
avg. for class3 given highest class 3 is 0.9495753126226518
avg. for class4 given highest class 3 is 0.6100388413045518
avg. for class5 given highest class 3 is 0.7830477066410176
avg. for class6 given highest class 3 is 0.91301532

In [0]:
for i in range(1,6+1):
    print("mean of class{} = {}".format(i,df_result[i].mean()))

mean of class1 = 0.5026710188087433
mean of class2 = 0.5568498153800981
mean of class3 = 0.8353596607938861
mean of class4 = 0.6104527141734526
mean of class5 = 0.875332490616419
mean of class6 = 0.7328528413394987


In [0]:
print(classification_report(y_sample, df_result['CLASS_pred']))

              precision    recall  f1-score   support

           1       0.63      0.15      0.24        79
           2       0.00      0.00      0.00         3
           3       0.33      0.53      0.41        32
           4       0.15      0.23      0.18        13
           5       0.69      0.76      0.73       118
           6       0.17      1.00      0.29         5

   micro avg       0.51      0.51      0.51       250
   macro avg       0.33      0.45      0.31       250
weighted avg       0.58      0.51      0.49       250



  'precision', 'predicted', average, warn_for)


In [0]:
confusion_matrix(y_sample, df_result['CLASS_pred'])

array([[12,  0, 20,  6, 27, 14],
       [ 0,  0,  2,  1,  0,  0],
       [ 3,  0, 17,  3,  6,  3],
       [ 0,  0,  2,  3,  7,  1],
       [ 4,  0, 10,  7, 90,  7],
       [ 0,  0,  0,  0,  0,  5]])

### 3. GuassianNB with Post-Feature Extraction with Expert Knowledge

In [0]:
with open("plp/prolog_string1.txt",'r') as file:
    prolog_string_with_sig = file.read()
prolog_string_with_sig1 = prolog_string_with_sig.replace('class','c')
prolog_string_with_sig2 = prolog_string_with_sig1.replace('sensor','s')
peek_tail(prolog_string_with_sig2 )

134261927566::c1 :- equal(s2,55), equal(s6,141), equal(s7,169), equal(s9,79).
0.000232138069116::c2 :- equal(s2,55), equal(s4,117), equal(s7,169), equal(s9,79).
1.55169887992e-07::c3 :- equal(s5,70), equal(s7,169), equal(s9,79).
0.0327939805109::c4 :- equal(s2,55), equal(s4,117), equal(s9,79).
0.778532819829::c5 :- equal(s4,117), equal(s9,79).
1.32055379437e-13::c6 :- equal(s7,169), equal(s9,79).


In [0]:
with open('plp/expert_knowledge_new.pl','r') as file:
    expert_knowledge_new = file.read()
rules = expert_knowledge_new.splitlines()[6:]
rstring = ""
for rule in rules:
    rstring = rstring + rule + '\n'
print(rstring)

0.419875209527565::c1 :- bareGroundNDVI.
0.696223446813728::c1 :- scarceVegetationNDVI.
0.368065482364758::c2 :- bareGroundNDVI.
0.458943604890671::c2 :- mediumVegetationNDVI.
0.939697649676343::c3 :- mediumVegetationNDVI.
0.131616762976077::c3 :- scarceVegetationNDVI.
0.940179646068921::c4 :- bareGroundNDVI.
0.442268833161366::c4 :- scarceVegetationNDVI.
0.731361841768842::c5 :- mediumVegetationNDVI.
0.659888201533512::c5 :- thickVegetationNDVI.
0.931275629158072::c6 :- waterNDVI.
0.420289855071412::cloudsNDVI.
0.999999999997892::bareGroundNDVI :- \+cloudsNDVI.
1.0::iceAndSnowNDVI :- \+bareGroundNDVI.
0.382221577726337::scarceVegetationNDVI :- \+iceAndSnowNDVI.
1.0::mediumVegetationNDVI :- \+scarceVegetationNDVI.
1.0::thickVegetationNDVI :- \+mediumVegetationNDVI.
1.0::waterNDVI :- \+thickVegetationNDVI.



In [0]:
new_kb = prolog_string_with_sig2 + rstring 

In [0]:
def predict_plp_model_1(vector):
    kb_with_query = add_query_problog_with_ndvi(prolog_string=new_kb,vector=vector)
    p = PrologString(kb_with_query)
    result = get_evaluatable().create_from(p).evaluate()
    return result

In [0]:
sensor_names = ["SENSOR"+str(i) for i in range(1,9+1)]
y_pred_class1_m3 = []
y_pred_class2_m3 = []
y_pred_class3_m3 = []
y_pred_class4_m3 = []
y_pred_class5_m3 = []
y_pred_class6_m3 = []

for index,row in df_sample.iterrows():
    vector_list = []
    for i in range(len(sensor_names)):
        vector_list.append(row[sensor_names[i]])
    result = predict_plp_model_1(vector_list)
    result_dict = convert_result_to_dict(result)
    y_pred_class1_m3.append(result_dict['c1'])
    y_pred_class2_m3.append(result_dict['c2'])
    y_pred_class3_m3.append(result_dict['c3'])
    y_pred_class4_m3.append(result_dict['c4'])
    y_pred_class5_m3.append(result_dict['c5'])
    y_pred_class6_m3.append(result_dict['c6'])

In [0]:
list_y_pred_m3 = []
list_y_pred_m3.append(y_pred_class1_m3)
list_y_pred_m3.append(y_pred_class2_m3)
list_y_pred_m3.append(y_pred_class3_m3)
list_y_pred_m3.append(y_pred_class4_m3)
list_y_pred_m3.append(y_pred_class5_m3)
list_y_pred_m3.append(y_pred_class6_m3)

df_result_m3 = pd.DataFrame(list_y_pred_m3).T
df_result_m3.columns = [i for i in range(1,6+1)]
df_result_m3['CLASS_pred'] = df_result_m3.idxmax(axis=1)
df_result_m3.tail()

Unnamed: 0,1,2,3,4,5,6,CLASS_pred
245,0.334255,0.575419,0.941456,0.56863,0.846535,0.931276,3
246,0.333713,0.575867,0.941456,0.587094,0.803261,0.931276,3
247,0.001276,0.46004,0.939698,0.051484,0.885352,0.931276,3
248,0.333198,0.510127,0.999833,0.550894,0.715525,0.931276,3
249,0.335157,0.511015,0.760645,0.572426,0.944155,0.0,5


In [0]:
print("model3")
df_result_class1 = df_result_m3[df_result_m3['CLASS_pred'] == 1]
df_result_class2 = df_result_m3[df_result_m3['CLASS_pred'] == 2]
df_result_class3 = df_result_m3[df_result_m3['CLASS_pred'] == 3]
df_result_class4 = df_result_m3[df_result_m3['CLASS_pred'] == 4]
df_result_class5 = df_result_m3[df_result_m3['CLASS_pred'] == 5]
df_result_class6 = df_result_m3[df_result_m3['CLASS_pred'] == 6]
df_result_class = []
for i in range(1,6+1):
    print("avg. for class{} given highest class 1 is {}".format(i,df_result_class1[i].mean()))
print("-----")
for i in range(1,6+1):
    print("avg. for class{} given highest class 2 is {}".format(i,df_result_class2[i].mean()))
print("-----")
for i in range(1,6+1):
    print("avg. for class{} given highest class 3 is {}".format(i,df_result_class3[i].mean()))
print("-----")
for i in range(1,6+1):
    print("avg. for class{} given highest class 4 is {}".format(i,df_result_class4[i].mean()))
print("-----")
for i in range(1,6+1):
    print("avg. for class{} given highest class 5 is {}".format(i,df_result_class5[i].mean()))
print("-----")
for i in range(1,6+1):
    print("avg. for class{} given highest class 6 is {}".format(i,df_result_class6[i].mean()))

model3
avg. for class1 given highest class 1 is 0.7840665049754832
avg. for class2 given highest class 1 is 0.24798211920830782
avg. for class3 given highest class 1 is 0.1316210163109092
avg. for class4 given highest class 1 is 0.7811109228950528
avg. for class5 given highest class 1 is 0.739706097679348
avg. for class6 given highest class 1 is 0.0
-----
avg. for class1 given highest class 2 is nan
avg. for class2 given highest class 2 is nan
avg. for class3 given highest class 2 is nan
avg. for class4 given highest class 2 is nan
avg. for class5 given highest class 2 is nan
avg. for class6 given highest class 2 is nan
-----
avg. for class1 given highest class 3 is 0.3233769772593546
avg. for class2 given highest class 3 is 0.5774112933038102
avg. for class3 given highest class 3 is 0.9426300024711518
avg. for class4 given highest class 3 is 0.5731137102189545
avg. for class5 given highest class 3 is 0.8134908519055134
avg. for class6 given highest class 3 is 0.931275758286396
-----
a

In [0]:
for i in range(1,6+1):
    print("mean of class{} = {}".format(i,df_result_m3[i].mean()))

mean of class1 = 0.39306713940339355
mean of class2 = 0.5381418843783684
mean of class3 = 0.8193129850203582
mean of class4 = 0.6381363402831307
mean of class5 = 0.8481315082749611
mean of class6 = 0.7324078358639757


In [0]:
print(classification_report(y_sample, df_result_m3['CLASS_pred']))

              precision    recall  f1-score   support

           1       0.33      0.01      0.02        79
           2       0.00      0.00      0.00         3
           3       0.13      0.50      0.20        32
           4       0.14      0.23      0.18        13
           5       0.59      0.32      0.42       118
           6       0.14      1.00      0.25         5

   micro avg       0.25      0.25      0.25       250
   macro avg       0.22      0.34      0.18       250
weighted avg       0.41      0.25      0.24       250



In [0]:
confusion_matrix(y_sample, df_result_m3['CLASS_pred'])

array([[ 1,  0, 39,  5, 19, 15],
       [ 0,  0,  2,  1,  0,  0],
       [ 0,  0, 16,  5,  6,  5],
       [ 0,  0,  8,  3,  1,  1],
       [ 2,  0, 62,  7, 38,  9],
       [ 0,  0,  0,  0,  0,  5]])