#  Convolutional Neural Network (CNN, or ConvNet)

### Convolutional:
<img src='../../img/1_CnNorCR4Zdq7pVchdsRGyw.png'>
<img src='../../img/giadascxvfdgephy.gif'>

### add padding:
<img src='../../img/1 nYf_cUIHFEWU1JXGwnz-Ig.gif'>

### polling:
<img src='../../img/maxpool_animation.gif'>



In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications.resnet import ResNet152, preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image
import cv2
import matplotlib.pyplot as PLT


2022-12-06 22:31:23.721485: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-06 22:31:23.721526: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-12-06 22:31:24.687491: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-12-06 22:31:24.687629: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


# Popular CNN Model

### search : cnn best architecture

* lenet 5
* alexnet
* zfnet
* vgg
* googlenet (inception) - inception v3
* resnet

In [None]:
model = ResNet152(weights='imagenet')

In [None]:
model.summary()

In [None]:
img = image.load_img('../../img/dog2.webp',target_size=(224,224))
img

In [None]:
X = image.img_to_array(img)
print(X.shape)
X = np.expand_dims(X,axis=0)
print(X.shape)
X = preprocess_input(X)

In [None]:
Y_predict = model.predict(X)
Y_predict = decode_predictions(Y_predict, top=10)
Y_predict

In [None]:
capture = cv2.VideoCapture(0)
while True:
    ret, frame = capture.read()
    frame = cv2.resize(frame,(224,224))
    image = frame[...,::-1] #bgr to rgb
    X = np.expand_dims(image,axis=0)
    X = preprocess_input(X)
    Y_predict = model.predict(X)
    name = decode_predictions(Y_predict, top=1)[0][0][1]
    cv2.putText(frame, name, (30,30), cv2.FONT_HERSHEY_SIMPLEX,1.0,(0,0,0))
    cv2.imshow('webcam', frame)
#     cv2.cvtColor(cv2.COLOR_BGR2RGB)
    if cv2.waitKey(1) == 13:
        break
capture.release()
cv2.destroyAllWindows()

In [None]:
import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))

# CNN Movie Polarity Detection

### dataset url : https://www.cs.cornell.edu/people/pabo/movie-review-data/

## search  : keras text_generation

In [2]:
import os
from nltk.corpus import stopwords
from nltk import word_tokenize
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelBinarizer

from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model,Sequential,load_model
from tensorflow.keras.layers import Input,Dense,Flatten,Embedding,Conv1D,MaxPool1D,concatenate,Dropout,Conv2D,MaxPool2D,Activation,BatchNormalization

In [None]:
import nltk
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

In [None]:
from string import punctuation
def punctuation_cleaner(string):
    global punctuation
    punctuation = punctuation+'\n\\//'
    s = str.maketrans('','',punctuation)
    return string.translate(s)

stopwords_en = stopwords.words('english')

In [None]:
negative_document = []
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

for file_name in os.listdir('../../../datasets/PolarityDetection/neg/'):
    with open(f'../../../datasets/PolarityDetection/neg/{file_name}') as f:
        text = f.read()
        text = punctuation_cleaner(text)
        text = word_tokenize(text)
        text = [w for w in text if w not in stopwords_en]
        text = set([stemmer.stem(w) for w in text])
        negative_document.append([' '.join(text),0])
        
positive_document = []
for file_name in os.listdir('../../../datasets/PolarityDetection/pos/'):
    with open(f'../../../datasets/PolarityDetection/pos/{file_name}') as f:
        text = f.read()
        text = punctuation_cleaner(text)
        text = word_tokenize(text)
        text = [w for w in text if w not in stopwords_en]
        text = [stemmer.stem(w) for w in text]
        text = [lemmatizer.lemmatize(w) for w in text]
        positive_document.append([' '.join(text),1])

In [None]:
PDF = pd.DataFrame(positive_document,columns=['text','target'])
NDF = pd.DataFrame(negative_document,columns=['text','target'])
DF = pd.concat([PDF,NDF],axis=0)
DF = DF.reset_index(drop=True)

In [None]:
DF.head()

In [None]:
max_len = DF['text'].str.split(' ').str.len().max() 

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(DF.iloc[:,:-1],DF['target'],test_size=.2)

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train['text'])

In [None]:
vocab_len = len(tokenizer.word_index) + 1 

In [None]:
X_train = tokenizer.texts_to_sequences(X_train['text'])
X_train = pad_sequences(X_train,maxlen=max_len,padding='post')

In [None]:
X_test = tokenizer.texts_to_sequences(X_test['text'])
X_test = pad_sequences(X_test,maxlen=max_len,padding='post')

In [None]:
# input1 = Input(shape=(max_len,))
# embeding1 = Embedding(max_len,100)(input1)
# conv1 = Conv1D(filters=32, kernel_size=4, activation='relu')(embeding1)
# dropout1 = Dropout(.5)(conv1)
# maxpool1 = MaxPool1D(pool_size=2)(dropout1)
# flat = Flatten()(maxpool1)
# dense1 = Dense(10,activation='relu')(flat)
# output = Dense(1,activation='sigmoid')(dense1)

# model = Model(inputs=[input1],outputs=output)

# model.compile(
#     loss='binary_crossentropy',
#     optimizer = 'adam',
#     metrics=['accuracy']
# )

In [None]:
model = Sequential([
    Embedding(input_dim=vocab_len,output_dim=200,input_shape=(max_len,)),
    Conv1D(filters=32, kernel_size=4, activation='relu'),
    Dropout(.2),
    MaxPool1D(pool_size=2),
    Flatten(),
    Dense(10,activation='relu'),
    Dense(1,activation='sigmoid')
])
model.compile(
    loss='binary_crossentropy',
    optimizer = 'adam',
    metrics=['accuracy']
)

In [None]:
history = model.fit(X_train,Y_train,epochs=10,batch_size=100,validation_data=(X_test,Y_test))

In [None]:
model.summary()

In [None]:
# model.save('../../../datasets/models/movie_polarity_detection.h5')

In [None]:
# load_model('../../../datasets/models/movie_polarity_detection.h5')

## Multi Channel

In [None]:
input1 = Input(shape=(max_len,))
embeding1 = Embedding(vocab_len,500)(input1)
conv1 = Conv1D(filters=32, kernel_size=4, activation='relu')(embeding1)
# dropout1 = Dropout(.5)(conv1)
maxpool1 = MaxPool1D(pool_size=2)(conv1)
flat1 = Flatten()(maxpool1)

input2 = Input(shape=(max_len,))
embeding2 = Embedding(vocab_len,500)(input2)
conv2 = Conv1D(filters=32, kernel_size=6, activation='relu')(embeding2)
# dropout2 = Dropout(.5)(conv2)
maxpool2 = MaxPool1D(pool_size=2)(conv2)
flat2 = Flatten()(maxpool2)

input3 = Input(shape=(max_len,))
embeding3 = Embedding(vocab_len,500)(input3)
conv3 = Conv1D(filters=32, kernel_size=8, activation='relu')(embeding3)
# dropout3 = Dropout(.5)(conv3)
maxpool3 = MaxPool1D(pool_size=2)(conv3)
flat3 = Flatten()(maxpool3)

flatX = concatenate([flat1,flat2,flat3])
dense1 = Dense(100,activation='relu')(flatX)
dense2 = Dense(10,activation='relu')(dense1)
output = Dense(1,activation='sigmoid')(dense2)

In [None]:
model = Model(inputs=[input1,input2,input3],outputs=output)
model.compile(
    loss='binary_crossentropy',
    optimizer = 'adam',
    metrics=['accuracy']
)

In [None]:
plot_model(model,show_layer_names=False,show_layer_activations=True)

In [None]:
history = model.fit(
    [X_train,X_train,X_train],
    Y_train,
    epochs=20,
    batch_size=100,
    validation_data=([X_test,X_test,X_test],Y_test)
)

In [None]:
model.save('../../../datasets/models/movie_polarity_detection.h5')

In [None]:
PLT.plot(history.history['accuracy'], color='green', label='train data')
PLT.plot(history.history['val_accuracy'], color='red', label='test data')

In [None]:
PLT.plot(history.history['loss'], color='green', label='train data')
PLT.plot(history.history['val_loss'], color='red', label='test data')

# House Price Regression

In [3]:
import glob

In [4]:
DF = pd.read_csv('../../../datasets/Houses Dataset/HousesInfo.txt',header=None,sep=' ',
                names=['bedrooms','bathrooms','area','zipcode','price'])
DF.index = np.arange(1, len(DF)+1)

In [5]:
DF.head(1)

Unnamed: 0,bedrooms,bathrooms,area,zipcode,price
1,4,4.0,4053,85255,869500


In [6]:
images = []
for i in DF.index.values:
    basepath = os.path.sep.join([
        '../../../datasets/Houses Dataset',
        "{}_*".format(i)
    ])
    imgPaths = sorted(glob.glob(basepath))
    inputImages = []
    for imgPath in imgPaths:
        img = cv2.cvtColor(cv2.resize(cv2.imread(imgPath),(128,128)),cv2.COLOR_BGR2RGB)
        inputImages.append(img)
    outputImages = np.zeros((256,256,3),dtype=np.int32)
    outputImages[0:128,0:128] = inputImages[0]
    outputImages[0:128,128:256] = inputImages[1]
    outputImages[128:256,128:256] = inputImages[2]
    outputImages[128:256,0:128] = inputImages[3]
    images.append(outputImages)
images = np.array(images)

In [7]:
FlattenIMG = pd.DataFrame(images.reshape(535,-1))
FlattenIMG.index = np.arange(1, len(FlattenIMG)+1)
NEWDF = pd.concat([DF,FlattenIMG],axis=1)

MaxPrice = NEWDF['price'].max()
X = NEWDF[NEWDF.columns.difference(['price'])]
Y = NEWDF['price'] / MaxPrice

In [8]:
X.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,196602,196603,196604,196605,196606,196607,area,bathrooms,bedrooms,zipcode
1,181,156,126,185,168,128,159,124,98,154,...,121,134,134,97,114,175,4053,4.0,4,85255


In [9]:
TABLECOLs = ['area','bathrooms','bedrooms','zipcode']
IMGCOLS = X.columns.difference(TABLECOLs)

In [10]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,random_state=3020,test_size=.2)

In [11]:
input1 = Input(shape=(256,256,3))
conv1 = Conv2D(16,(3,3),padding='same',activation='relu')(input1)
batch_norm_1 = BatchNormalization(axis=-1)(conv1)
maxpool1 = MaxPool2D(pool_size=(2,2))(batch_norm_1)
conv2 = Conv2D(32,(3,3),padding='same',activation='relu')(input1)
batch_norm_2 = BatchNormalization(axis=-1)(conv2)
maxpool2 = MaxPool2D(pool_size=(2,2))(batch_norm_2)
conv3 = Conv2D(64,(3,3),padding='same',activation='relu')(input1)
batch_norm_3 = BatchNormalization(axis=-1)(conv3)
maxpool3 = MaxPool2D(pool_size=(2,2))(batch_norm_3)
flatten1 = Flatten()(maxpool3)
dense1 = Dense(100,activation='relu')(flatten1)
batch_norm_4 = BatchNormalization(axis=-1)(dense1)
dense2 = Dense(10,activation='relu')(batch_norm_4)
model1 = Model(input1,dense2)

2022-12-06 22:32:09.052382: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/masoud/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:
2022-12-06 22:32:09.052413: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2022-12-06 22:32:09.052446: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: masoud-Aspire-V3-571G
2022-12-06 22:32:09.052455: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: masoud-Aspire-V3-571G
2022-12-06 22:32:09.052544: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: NOT_FOUND: was unable to find libcuda.so DSO loaded into this program
2022-12-06 22:32:09.052588: I tensorflow/

In [12]:
model2 = Sequential()
model2.add(Dense(50,activation='relu',input_dim=4))
model2.add(Dense(25,activation='relu'))
model2.add(Dense(10,activation='relu'))
model2.add(Dense(10,activation='relu'))

In [13]:
combine_input = concatenate([model1.output,model2.output])
dense_final_1 = Dense(10,activation='relu')(combine_input)
dense_final_2 = Dense(1,activation='linear')(dense_final_1) # sigmoid
modelX = Model(inputs=[model1.input,model2.input], outputs=dense_final_2)
modelX.compile(
    loss='mean_absolute_error',
    optimizer = 'adam'
)

In [14]:
histrory = modelX.fit(
    [
        X_train[IMGCOLS].to_numpy().reshape(len(X_train),256,256,3),
        X_train[TABLECOLs]
    ],Y_train,
    validation_data=(
        [
            X_test[IMGCOLS].to_numpy().reshape(len(X_test),256,256,3),
            X_test[TABLECOLs]
        ],Y_test
    ),
    epochs=20,
    batch_size=50
)

Epoch 1/20


2022-12-06 22:33:06.096665: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 838860800 exceeds 10% of free system memory.
2022-12-06 22:33:06.672227: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 838860800 exceeds 10% of free system memory.
2022-12-06 22:33:10.453768: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 838860800 exceeds 10% of free system memory.


1/9 [==>...........................] - ETA: 1:15 - loss: 9132.2617

2022-12-06 22:33:12.902119: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 838860800 exceeds 10% of free system memory.
2022-12-06 22:33:13.460174: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 838860800 exceeds 10% of free system memory.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
plot_model(modelX,show_layer_names=False,show_shapes=True)

In [16]:
Y_predicted = modelX.predict([
    X_test[IMGCOLS].to_numpy().reshape(len(X_test),256,256,3),
    X_test[TABLECOLs]
]) * MaxPrice
Y_predicted = Y_predicted.flatten()



In [18]:
diff = abs(Y_predicted - ( Y_test.values * MaxPrice ))
diff.mean()

1114574.503006676

In [28]:
NEWDF['price'].mean()

589362.8112149533

In [19]:
NEWDF['price'].describe()

count    5.350000e+02
mean     5.893628e+05
std      5.090261e+05
min      2.200000e+04
25%      2.492000e+05
50%      5.290000e+05
75%      7.285000e+05
max      5.858000e+06
Name: price, dtype: float64

In [21]:
from sklearn.metrics import r2_score,mean_absolute_error

In [23]:
r2_score(y_pred=Y_predicted,y_true=( Y_test.values * MaxPrice ) )

-13.75167280848191

In [27]:
mean_absolute_error(y_pred=Y_predicted,y_true=( Y_test.values * MaxPrice ) )

1114574.503006676

# Simple one hot encode

In [None]:
zipbin = LabelBinarizer() #simple one hot encode
zipbin.fit(NEWDF['zipcode'])
zipbin.transform(X_train['zipcode'])