In [3]:
! pip install tensorflow
from numpy import concatenate
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.semi_supervised import LabelPropagation
import tensorflow as tf
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
from sklearn.metrics import accuracy_score,confusion_matrix , precision_score, recall_score
import numpy as np
import matplotlib.pyplot as plt

## Load the input image data and test data from tensorflow cifar dataset
(x_train, yoriginal), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
print("Shape of training input images",x_train.shape)

#Load the given noisy labels
noiselabels=pd.read_json("cifar10_noisy_labels_task1.json")
y_train=np.array(noiselabels)

print("Shape of training noise labels",y_train.shape)
print("Shape of test dataset images",x_test.shape)
print("Shape of test dataset labels",y_test.shape)

x_train, x_test = x_train / 255.0, x_test / 255.0

## Split training dataset into labeled and unlabeled dataset
# As the first set of 10000 samples has given less training loss when trained separately using cnn model, hence using that set as labelled dataset.
x_train_lab, x_test_unlab, y_train_lab, y_test_unlab = train_test_split(x_train, y_train, test_size=0.80, stratify=y_train)

print("Length of labelled data",len(y_train_lab))
print("Length of unlabelled data",len(y_test_unlab))

## Creating the training input image dataset 
X_train_mixed = concatenate((x_train_lab, x_test_unlab))

## creating "no label" for the unlabeled data
nolabel = [-1 for _ in range(len(y_test_unlab))]

## Creating combined training dataset labels
y_train_mixed = concatenate((y_train_lab[:,0], nolabel))
print("Length of combined labelled and unlabelled data", len(y_train_mixed))

## Defining the first labelpropagation model
model_propa = LabelPropagation(gamma=.25)

## Fit our model on training dataset
nsamples, nx, ny ,nz = X_train_mixed.shape
d2_train_dataset = X_train_mixed.reshape((nsamples,nx*ny*nz))
model_propa.fit(d2_train_dataset, y_train_mixed)
LEN=4980
print("Done")

## Define labels for entire training dataset data
import pandas as pd
import numpy as np
tran_labels = model_propa.transduction_
dummy=pd.get_dummies(tran_labels)
print("New labels obtained for training set after labelpropagation\n",dummy)

Shape of training input images (50000, 32, 32, 3)
Shape of training noise labels (50000, 1)
Shape of test dataset images (10000, 32, 32, 3)
Shape of test dataset labels (10000, 1)
Length of labelled data 10000
Length of unlabelled data 40000
Length of combined labelled and unlabelled data 50000
Done
New labels obtained for training set after labelpropagation
        0  1  2  3  4  5  6  7  8  9
0      0  0  0  0  1  0  0  0  0  0
1      0  0  1  0  0  0  0  0  0  0
2      0  0  0  0  0  0  1  0  0  0
3      1  0  0  0  0  0  0  0  0  0
4      0  1  0  0  0  0  0  0  0  0
...   .. .. .. .. .. .. .. .. .. ..
49995  0  0  0  0  0  0  1  0  0  0
49996  0  0  0  0  0  0  0  0  0  1
49997  1  0  0  0  0  0  0  0  0  0
49998  1  0  0  0  0  0  0  0  0  0
49999  0  0  0  0  0  0  0  0  1  0

[50000 rows x 10 columns]




In [None]:
## Create CNN Model
from tensorflow.keras import  layers, models
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), dtype='float32', activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='softmax'))
model.add(layers.Dense(10))

## Compile The model
model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

## fit the model
import numpy as np
history=model.fit(x_train, dummy, batch_size=32, epochs=50)

In [None]:
## Predicting the test dataset
model.summary()
true_testlabels=pd.get_dummies(y_test[:,0])
test_loss, test_acc= model.evaluate(x_test,true_testlabels)
print('Test Data loss:', test_loss)
print('Test Data accuracy:', test_acc)
history_dict = history.history
history_dict.keys()
pd.DataFrame(history_dict).plot(figsize=(10,6))
plt.grid(True)
plt.gca().set_ylim(0, 2)
plt.title('Accuracy and loss for training', fontsize=16)
plt.xlabel('Epochs', fontsize=16)
plt.ylabel('Accuracy and loss percentage', rotation=90, fontsize=16)
plt.show()

ypred = model.predict(x_test)
ypred=np.argmax(ypred,axis=1)
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
from  matplotlib import pyplot 

for i in range(2):
    # define subplot
    pyplot.subplot(330+1+i)
    # plot test data
    pyplot.title(ypred[i])
    pyplot.imshow(x_test[i])
# show the figure
pyplot.show()
