In [75]:
import os
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

breeding_sites = [ 
                'pot, flowerpot', 
                'stupa, tope', 
                'water jug', 
                'water bottle', 
                'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin', 
                'greenhouse, nursery, glasshouse', 
                'milk can', 
                'barrel, cask', 
                'canoe', 
                'rain barrel', 
                'lakeside, lakeshore', 
                'Dutch oven' 
                ]

province = 'ชัยนาท'
district = 'มโนรมย์'
subdist = 'ท่าฉนวน'
village = 'บ้านคลองรุน'

directory = os.path.join('../../GSV',province, district, subdist, village)

features = pd.read_csv(os.path.join(directory,'features.csv'), sep=';', index_col='img_name',
                        names=['img_name','1st_result','1st_score','2nd_result','2nd_score',
                                           '3rd_result','3rd_score','4th_result','4th_score',
                                           '5th_result','5th_score'])
def select_rows(features, breeding_sites, threshold=0.1):
    print('total:', len(features))
    features = features[features['1st_score'] >= threshold]
    print('after thresh:',len(features))
    features = features.loc[features['1st_result'].isin(breeding_sites)]
    print('features:',len(features))
    return features

def add_segment(features):
    img_name = features.index
    segment = []
    for img in img_name:
        img = img[img.index('_',img.index('-'))+1:]
        img = img[img.index('_')+1:]
        segment.append(img)
    features['segment'] = segment
    return features
    
features = select_rows(features, breeding_sites, threshold=0.15)
features = add_segment(features)
features.head()

total: 8568
after thresh: 3004
features: 101


Unnamed: 0_level_0,1st_result,1st_score,2nd_result,2nd_score,3rd_result,3rd_score,4th_result,4th_score,5th_result,5th_score,segment
img_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
15.38203756895384_100.1636399994293_120_2013-09_5_Tree,"lakeside, lakeshore",0.221853,"worm fence, snake fence, snake-rail fence, Vir...",0.110338,swing,0.050111,golf ball,0.021523,"golfcart, golf cart",0.017961,Tree
15.38207965704545_100.1512157151918_180_2013-07_8_Tree,"lakeside, lakeshore",0.211036,"valley, vale",0.152368,"promontory, headland, head, foreland",0.034948,"worm fence, snake fence, snake-rail fence, Vir...",0.028328,"radio telescope, radio reflector",0.024887,Tree
15.37803305245586_100.1695462175645_0_2013-09_12_Pedestrian,"lakeside, lakeshore",0.172551,"valley, vale",0.030604,"mountain bike, all-terrain bike, off-roader",0.029575,"black stork, Ciconia nigra",0.025024,"brown bear, bruin, Ursus arctos",0.024581,Pedestrian
15.37814466476857_100.162777803804_60_2013-09_6_SignSymbol,"lakeside, lakeshore",0.278344,"worm fence, snake fence, snake-rail fence, Vir...",0.06204,"mountain bike, all-terrain bike, off-roader",0.031035,"valley, vale",0.023419,"mobile home, manufactured home",0.019623,SignSymbol
15.38207965704545_100.1512157151918_300_2013-07_6_Tree,"lakeside, lakeshore",0.214666,"valley, vale",0.063108,"worm fence, snake fence, snake-rail fence, Vir...",0.044625,rapeseed,0.028637,"mobile home, manufactured home",0.019359,Tree


In [76]:
def preprocess(features): 
    features = features.drop('1st_score', axis=1)    
    features = features.drop('2nd_score', axis=1)
    features = features.drop('3rd_score', axis=1)    
    features = features.drop('4th_score', axis=1)
    features = features.drop('5th_score', axis=1)
    
    for c in features.columns:
        features[c]=features[c].fillna(-1)
        if features[c].dtype == 'object':
            lbl = LabelEncoder()
            lbl.fit(list(features[c].values))
            features[c] = lbl.transform(list(features[c].values))
    return features

In [77]:
data = preprocess(features)
data.head()

Unnamed: 0_level_0,1st_result,2nd_result,3rd_result,4th_result,5th_result,segment
img_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
15.38203756895384_100.1636399994293_120_2013-09_5_Tree,0,16,19,5,11,7
15.38207965704545_100.1512157151918_180_2013-07_8_Tree,0,15,15,27,23,7
15.37803305245586_100.1695462175645_0_2013-09_12_Pedestrian,0,15,11,2,6,4
15.37814466476857_100.162777803804_60_2013-09_6_SignSymbol,0,16,11,26,15,6
15.38207965704545_100.1512157151918_300_2013-07_6_Tree,0,15,23,17,15,7


In [81]:
bst = xgb.Booster() 
bst.load_model('../../dataset/xgb.model')
y_xgb = bst.predict(xgb.DMatrix(data))

predicted = [int(round(value)) for value in y_xgb]

features['cls'] = predicted

classified = features.loc[features['cls'] == 1]
print('classified:', len(classified))

classified: 82


In [82]:
classified = classified.drop('cls',axis=1)
classified = classified.drop('segment',axis=1)
classified.head()

Unnamed: 0_level_0,1st_result,1st_score,2nd_result,2nd_score,3rd_result,3rd_score,4th_result,4th_score,5th_result,5th_score
img_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
15.38203756895384_100.1636399994293_120_2013-09_5_Tree,"lakeside, lakeshore",0.221853,"worm fence, snake fence, snake-rail fence, Vir...",0.110338,swing,0.050111,golf ball,0.021523,"golfcart, golf cart",0.017961
15.38207965704545_100.1512157151918_180_2013-07_8_Tree,"lakeside, lakeshore",0.211036,"valley, vale",0.152368,"promontory, headland, head, foreland",0.034948,"worm fence, snake fence, snake-rail fence, Vir...",0.028328,"radio telescope, radio reflector",0.024887
15.37803305245586_100.1695462175645_0_2013-09_12_Pedestrian,"lakeside, lakeshore",0.172551,"valley, vale",0.030604,"mountain bike, all-terrain bike, off-roader",0.029575,"black stork, Ciconia nigra",0.025024,"brown bear, bruin, Ursus arctos",0.024581
15.37814466476857_100.162777803804_60_2013-09_6_SignSymbol,"lakeside, lakeshore",0.278344,"worm fence, snake fence, snake-rail fence, Vir...",0.06204,"mountain bike, all-terrain bike, off-roader",0.031035,"valley, vale",0.023419,"mobile home, manufactured home",0.019623
15.38207965704545_100.1512157151918_300_2013-07_6_Tree,"lakeside, lakeshore",0.214666,"valley, vale",0.063108,"worm fence, snake fence, snake-rail fence, Vir...",0.044625,rapeseed,0.028637,"mobile home, manufactured home",0.019359


In [84]:
classified.to_csv(os.path.join(directory,'features_classified.csv'), header=None, sep=';')