# StreetView_ImageClassification

This program classifies images of house facades using a Convolutional Neural Network (CNN)
Output has two classes: 
    SV (Pictures taken from Street View screenshots) 
    REAL (Real pictures taken in front of the house)

In [None]:
###### IMPORTANT ######
# Enter the directory in which you cloned this repository
path2repo="/opt/sasinside/DemoData/"

### Dependencies and connection to the CAS server.

In [None]:
import os
import time
import math
import pandas as pd
import numpy as np
from swat import *
from PIL import Image, ImageStat

path=path2repo+'StreetView_ImageClassification'
cashost='localhost'
casport=5570
casauth='~/.authinfo'
s = CAS(cashost, casport, caslib="cas")
s.addcaslib(path=path,
            caslib='sv_cnn_demo',
            description='Street View classification CNN',
            subdirs=True, #for acces to image directories
            session=False,  #global CASLIB 
            activeonadd=True)

s.setsessopt(caslib='sv_cnn_demo')

s.loadactionset(actionset="image")
s.loadactionset(actionset="table")
s.loadactionset(actionset="astore")
s.loadactionset(actionset="deepLearn")
s.loadactionset(actionset="sampling")
s.loadactionset(actionset='freqtab')

seed=123

set_option(print_messages=True)
np.set_printoptions(precision=3,suppress=True)

### Load, partition, pre-process and augment images

In [None]:
t0=time.time()

s.loadImages(casOut={'name':'Images','replace':True},
             path=path+'/IMAGES',
             quiet=True,
             recurse=True,   # This line and the next recurse over subdirectories 
             labelLevels=-1) # and assign labels accordingly (REAL or SV)

s.stratified(table={"name":"Images","groupBy":{"_label_"}},
             output={"casOut":{"name":"Images","replace":True},"copyVars":"ALL"},
             samppct=25,   #Validation partition percentage
             partind=True, #Create binary variable _PartInd_. 0=Training set, 1=Validation set
             seed=seed)       

s.processImages(casOut={'name':'Images','replace':True},
                table='Images',
                copyVars='_PartInd_',
                imageFunctions=[
                        {'options':{'functionType':'CONVERT_COLOR','type':'COLOR2GRAY'}},        #Grayscale
                        {'options':{'functionType':'SOBEL','dx':1,'dy':1,'kernelsize':'SIZE3'}}, #Sobel edge detection
                        {'options':{'functionType':'RESIZE','width':1120,'height':1120}}])       #Resize to match CNN input layer

s.augmentImages(casOut={'name':'Images','replace':True}, #Image augmentation (3X)
                table='Images',
                copyVars='_PartInd_',
                cropList=[{'useWholeImage':True,'mutations':{'horizontalFlip':True}}, #Horizontal mirror
                          {'useWholeImage':False,'x':10,'y':10,'w':1100,'h':1100,'outW':1120,'outH':1120}]) #Crop & resample


print('\nPre-processing time:',round((time.time()-t0)/60,2),'minutes')

### Define the Convolutional Neural Network architecture

In [None]:
#The defined architecture has four groups of layers, each grou consisting of:

##Convolutional layer
##Batch normalization layer
##Pooling layer

##The input layer takes images of size 1120x1120x1 (Gray scale)
##The output layer has two neurons, each one fully connected to the last pooling layer (pool4)

modelName='CNN_SV'

# Create a CNN
s.buildModel(model=dict(name=modelName,replace=True),type='CNN')

# input layer: 1channel, 
s.addLayer(model=modelName, name='data',
          layer=dict( type='input', nchannels=1, width=1120, height=1120)) 

# pool0 layer: 1 channel, 5x5 pooling, output = 224 x 224 */
s.addLayer(model=modelName, name='pool0',
          layer=dict(type='pooling',width=5, height=5, stride=5, pool='max'), 
          srcLayers=['data'])

#------------------FIRST LAYER-------------------------------

# conv1 layer: 6 channels, 7x7 conv, stride=2; output = 112 x 112 */
s.addLayer(model=modelName, name='conv1',
          layer=dict( type='convolution', nFilters=10, width=3, height=3, 
                      stride=2, act='identity'), 
          srcLayers=['pool0'])

# bn_conv1 batch norm layer: 6 channels, output = 112 x 112 */
s.addLayer(model=modelName, name='bn_conv1',
          layer=dict( type='batchnorm', act='relu'), 
          srcLayers=['conv1'])

# pool1 layer: 6 channels, 3x3 pooling, output = 56 x 56 */
s.addLayer(model=modelName, name='pool1',
          layer=dict(type='pooling',width=3, height=3, stride=2, pool='max'), 
          srcLayers=['bn_conv1'])

#------------------SECOND LAYER-------------------------------

# conv2 layer: 20 channels, 3x3 conv, output = 28 x 28
s.addLayer(model=modelName, name='conv2',
          layer=dict(type='convolution', nFilters=20, width=3, height=3, 
                     stride=2, noBias=True, act='identity'), 
          srcLayers=['pool1'])

# bn_conv2 batch norm layer: 20 channels, output = 28 x 28
s.addLayer(model=modelName, name='bn_conv2',
          layer=dict( type='batchnorm', act='relu'), 
          srcLayers=['conv2'])

#pool2 layer: 20 channels, output = 14 x 14
s.addlayer(model=modelName, name='pool2',
           layer=dict( type='pooling',width=2,height=2,stride=2,pool='max'),
           srcLayers=['bn_conv2'])

#------------------THIRD LAYER-------------------------------

# conv3 layer: 128 channels, 1x1 conv, output = 14 x 14
s.addLayer(model=modelName, name='conv3',
          layer=dict( type='convolution', nFilters=128, width=1, height=1, 
                      stride=1, noBias=True, act='identity'), 
          srcLayers=['pool2'])

# bn_conv3 batch norm layer: 128 channels, output = 14 x 14
s.addLayer(model=modelName, name='bn_conv3',
          layer=dict( type='batchnorm', act='relu'), 
          srcLayers=['conv3'])

#pool3  outout = 7 x 7
s.addlayer(model=modelName, name='pool3',
           layer=dict( type='pooling',width=2,height=2,stride=2,pool='mean'),
           srcLayers=['bn_conv3'])

#------------------FOURTH LAYER-------------------------------

# conv4 layer: 256 channels, 1x1 conv, output = 7 x 7
s.addLayer(model=modelName, name='conv4',
          layer=dict( type='convolution', nFilters=256, width=1, height=1, 
                      stride=1, noBias=True, act='identity'), 
          srcLayers=['pool3'])

#bn_conv4 batch norm layer: 128 channels, output = 7 x 7
s.addLayer(model=modelName, name='bn_conv4',
          layer=dict( type='batchnorm', act='relu'), 
          srcLayers=['conv4'])

# pool4 layer: 256 channels, 7x7 pooling, output = 1 x 1
s.addLayer(model=modelName, name='pool4',
          layer=dict(type='pooling',width=7, height=7, stride=7, pool='mean'), 
          srcLayers=['bn_conv4'])

#------------------OUTPUT LAYER-------------------------------

# fc2 output layer: 2 neurons 
s.addLayer(model=modelName, name='fc2',
          layer=dict(type='output',n=2, act='softmax'), 
          srcLayers=['pool4'])

### Load pre-trained weights

In [None]:
## If you do not wish to train the network, you can use load the pre-trained weights

weights='PreTrained1' ## Weight file to use


s.loadTable(path='Weights/'+weights+'.csv',
            casout={'name':'CNN_SV_WEIGHTS','replace':True})
s.loadTable(path='Weights/ATTRS.sashdat',casout={'name':'attrs','replace':True})
s.table.attribute(task = "ADD", 
                  name = 'CNN_SV_WEIGHTS', 
                  attrtable = "attrs"
                  )


s.dlscore(model='CNN_SV',
          initWeights={'name':'CNN_SV_WEIGHTS'},
          table={'name':"Images"},
          casout={'name':'ScoredValidation','replace':True})

### CNN Training

In [None]:
## Weights are randomly initialized..

t0=time.time()

train=s.deepLearn.dlTrain(inputs=[{"name":"_image_"}],
                    modelTable={"name":modelName},
                    modelWeights={"name":"CNN_SV_WEIGHTS","replace":True},
                    table={"name":"Images","where":"_PartInd_=0"}, #Use training set
                    seed=seed,
                    optimizer={"algorithm":{"method":"ADAM",
                                            "learningRate":0.0005,
                                            "learningRatePolicy":"POLY", #Gradualy reduces learning rate.
                                            "power":.50, #Regulating learning rate decrease
                                            "useLocking":True}, #Synchronous SGD
                               "maxEpochs":12,#Iterations over entire dataset
                               "dropout":.5, #Dropout for reducing chances overfitting
                               #"regL1":5e-5, #Regularization 
                               #"regL2":.1,
                               "logLevel":3, #Verbosity (1,2,3)
                               "totalminiBatchSize":20, #Batch size
                               "seed":seed #Random seed
                              },
                    validTable={"name":"Images","where":"_PartInd_=1"}, #Validation set
                    target="_label_" #Target variable name
                    )
print(train)

tt=time.time()
print('\nTiempo de entrenamiento:',round((tt-t0)/60,2),'minutos')

## Score training set
score_t=s.dlscore(model='CNN_SV',
          initWeights={'name':'CNN_SV_WEIGHTS'},
          table={'name':"Images",'where':'_PartInd_=0'},
          casout={'name':'ScoredTraining','replace':True})

## Score validation set
score_v=s.dlscore(model='CNN_SV',
          initWeights={'name':'CNN_SV_WEIGHTS'},
          table={'name':"Images",'where':'_PartInd_=1'},
          casout={'name':'ScoredValidation','replace':True})
print(score_t['ScoreInfo'])
print(score_v['ScoreInfo'])
print('\nTiempo de evaluación:',round((time.time()-tt)/60,2),'minutos')

### Save Trained weights

In [None]:
name='myWeights' #Modificar nombre del archivo
s.save(table="CNN_SV_WEIGHTS",name='Weights/'+name+'.csv',replace=True)

### Exportar as ASTORE and score

In [None]:
s.dlExportmodel(casout={'name':'ASTORE','replace':True},
                initweights='CNN_SV_WEIGHTS',
                modeltable='CNN_SV')

s.score(casout={'name':'ASTORE_SCORE','replace':True},
        copyVars='_path_',
        table={'name':'Images','where':'_partInd_=1'},
        rstore='ASTORE')