In [1]:
import cv2
import time
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.datasets import cifar10
from keras.models import Model
from tensorflow.keras import datasets, layers, models, losses
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.utils import to_categorical , plot_model
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score, f1_score
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input

In [9]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

Convert both X_train and X_test to grayscale

In [10]:
X_train = np.array([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) for image in X_train])
X_test = np.array([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) for image in X_test])

Value normalization

In [11]:
X_train = X_train.reshape(50000,1024)
X_test = X_test.reshape(10000,1024)

Defining input shape

In [12]:
X_train  = X_train/255
X_test  = X_test/255

PCA

In [None]:
def PCA(X , num_components):

    X_meaned = X - np.mean(X , axis = 0)

    cov_mat = np.cov(X_meaned , rowvar = False)
     
    eigen_values , eigen_vectors = np.linalg.eigh(cov_mat)
     
    sorted_index = np.argsort(eigen_values)[::-1]
    sorted_eigenvalue = eigen_values[sorted_index]
    sorted_eigenvectors = eigen_vectors[:,sorted_index]
     
    eigenvector_subset = sorted_eigenvectors[:,0:num_components]
     
    X_reduced = np.dot(eigenvector_subset.transpose() , X_meaned.transpose() ).transpose()
     
    return X_reduced

In [None]:
merged_df = np.concatenate((X_train, X_test),axis=0)
merged_df = PCA(merged_df, 50)

X_train = merged_df[0:50000]
X_test = merged_df[50000:]

In [13]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state = 20)

Label preprocessing

In [14]:
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
y_val = to_categorical(y_val , 10)

Constructing MLP model

In [None]:
def make_network():
  network = Sequential()
  network.add(Dense(256, activation = 'relu', input_shape = (50, )))
  network.add(Dense(128, activation= 'relu'))
  network.add(Dense(64, activation= 'relu'))
  # network.add(Dense(32, activation= 'relu'))
  # network.add(Dense(16, activation= 'relu'))
  network.add(Dense(10, activation = 'softmax'))
  network.compile(loss = 'categorical_crossentropy', optimizer = keras.optimizers.SGD(learning_rate = 0.001), metrics= ['accuracy'])
  return network
network =make_network()

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss',mode="min", patience=5)
history = network.fit(X_train, y_train,batch_size=32,epochs=200,validation_data = (X_val,y_val ), callbacks=[callback])

Model evaluation

In [None]:
loss_plt = plt.figure(figsize = (10, 10))
plt.plot(history.history['accuracy'])
plt.plot(history.history["val_accuracy"])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(["Train accuracy", "Validation accuracy"])
plt.show()

Model evaluation

In [None]:
loss_plt = plt.figure(figsize = (10, 10))
plt.plot(history.history['loss'])
plt.plot(history.history["val_loss"])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(["Train loss", "Validation loss"])
plt.show()

recall,precision,f1_score

In [None]:
real_label = np.argmax(y_test, axis = 1)
predicted_label = np.argmax(network.predict(X_test), axis = 1)
recall = recall_score(real_label, predicted_label, average='micro')
precision = precision_score(real_label, predicted_label,average='macro')
f1 = f1_score(real_label, predicted_label,average='macro')
print("recall is :" , recall )
print("precision is :" , precision )
print("f1 is :",f1)

Loss and Accuracy in test data, Training time

In [None]:
test_loss, test_acc = network.evaluate(X_test, y_test)
print("loss in test data is: ", test_loss)
print("accuracy in test data is : ", test_acc)

Creating confusion matrix

In [None]:
real_label = np.argmax(y_test, axis = 1)
predicted_label = np.argmax(network.predict(X_test), axis = 1)

cm = confusion_matrix(real_label, predicted_label)
normalized_cm = confusion_matrix(real_label, predicted_label, normalize = 'true')
fig, ax = plt.subplots(nrows = 1, ncols = 2)
fig.subplots_adjust(right = 3, top = 1.3)
ax[0].title.set_text("confusion Matrix - without Normalization")
ConfusionMatrixDisplay(cm).plot(cmap = plt.cm.Reds, values_format = 'd', xticks_rotation = 45, ax = ax[0])
ax[1].title.set_text("Confusion Matrix - Normalized")
ConfusionMatrixDisplay(normalized_cm).plot(cmap = plt.cm.Reds, values_format = ' .2f', xticks_rotation = 45, ax = ax[1])

cumulative explained variance

In [None]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train = X_train.reshape(50000,3072)
X_test = X_test.reshape(10000,3072)
df1 = pd.DataFrame(X_train)
df3 = pd.DataFrame(X_test)

frames = [df1,df3]
result = pd.concat(frames)
pca = PCA().fit(result)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')

Autoencoder

In [None]:
In= Input(shape=(1024,))
Encoder = Dense(50, activation='relu')(In)
Decoder = Dense(10, activation='sigmoid')(Encoder)
Autoencoder = Model(inputs = In, outputs = Decoder)
AEncoder = Model(In, Encoder)
Autoencoder.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = Autoencoder.fit(X_train, y_train, epochs=200, batch_size=32,verbose=1)

In [None]:
plt.plot(history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

correlation matrix

In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

import seaborn as sn
data = pd.read_csv('/content/gdrive/MyDrive/data.csv')

corrMatrix = data.corr()
sn.heatmap(corrMatrix, annot=True)
plt.show()

feature importance_LinearRegression

In [None]:
from sklearn.linear_model import LinearRegression

data['date'] = pd.factorize(data['date'])[0] + 0.0
data['street'] = pd.factorize(data['street'])[0] +0.0
data['city'] = pd.factorize(data['city'])[0] + 0.0
data['statezip'] = pd.factorize(data['statezip'])[0] + 0.0
data['country'] = pd.factorize(data['country'])[0] + 0.0

y = data.price
x = data.drop('price' , axis =1 )

model = LinearRegression()
model.fit(x,y)
importance = model.coef_
bars = ('date','bedrooms','bathroom','sqft_living','sqft_lot','floors','waterfront','view','condition',
        'sqft_above','sqft_basement','yr_built','yr_renovated','street','city','statezip','country')
y_pos = np.arange(len(bars))
plt.figure(figsize=(30,10))
plt.bar([x for x in range(len(importance))], importance)
plt.xticks(y_pos, bars)
plt.show()

feature importance_DecisionTreeRegressor

In [None]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor()
model.fit(x,y)
importance = model.feature_importances_
bars = ('date','bedrooms','bathroom','sqft_living','sqft_lot','floors','waterfront','view','condition',
        'sqft_above','sqft_basement','yr_built','yr_renovated','street','city','statezip','country')
y_pos = np.arange(len(bars))
plt.figure(figsize=(30,10))
plt.bar([x for x in range(len(importance))], importance)
plt.xticks(y_pos, bars)

plt.show()
