K-Neighbors Classifier(KNN)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import os
from sklearn.metrics import accuracy_score
from skimage import io,transform,filters,color
import random

In [None]:
#preprocessing the images
def preprocess_image(image_path):
    image=io.imread(image_path)
    image=color.rgb2gray(image)
    blured_image=filters.gaussian(image,sigma=1)
    resized_image=transform.resize(blured_image,(100,100))
    
    resized_image = np.fliplr(resized_image)
    rotation_angle =random.uniform(-10, 10)
    resized_image = transform.rotate(resized_image, rotation_angle)
    
    return resized_image.flatten()
    
    

In [None]:
#load the images from the respective folder
trainfolder="C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\training_set"
class_names=["cats","dogs"]
data=[]
label=[]

for class_name in class_names:
    class_path=os.path.join(trainfolder,class_name)
    for image_name in os.listdir(class_path):
        if not image_name.startswith('_DS_Store'):
            image_path=os.path.join(class_path,image_name)
            pre_image=preprocess_image(image_path)
            data.append(pre_image)
            label.append(class_name)
data=np.array(data)
label=np.array(label)
        


In [None]:
data_2d=data

In [None]:
'''
#reshape the data into 3D to 2D
print(data.shape)
num_samples, height, width = data.shape
data_2d = data.reshape(num_samples, height * width)
'''
print(data.shape)

In [None]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(data_2d,label,test_size=0.2,random_state=42)
knn=KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train,y_train)
knn_prediction=knn.predict(x_test)
score=accuracy_score(knn_prediction,y_test)
print(score)

In [None]:
#x_train,x_test,y_train,y_test=train_test_split(data_2d,label,test_size=0.2,random_state=42)
best_accuracy=0
best_k=None

for k in range(1,21):
    knn=KNeighborsClassifier(n_neighbors=k)
    knn.fit(x_train,y_train)
    k_pred=knn.predict(x_test)
    accuracy=accuracy_score(k_pred,y_test)
    
    if accuracy>best_accuracy:
        best_accuracy=accuracy
        best_k=k
print("best_k",k)
print("best_accuracy",best_accuracy)
    



In [None]:
best_accuracy=0
best_k=None
best_prediction=None
k_values=[]
accuracy_values=[]
for k in range(1,26):
    knn=KNeighborsClassifier(n_neighbors=k)
    knn.fit(x_train,y_train)
    k_pred=knn.predict(x_test)
    accuracy=accuracy_score(k_pred,y_test)
    k_values.append(k)
    accuracy_values.append(accuracy)
    
    print(f"K:{k},Accuracy:{accuracy}")
    
    if accuracy>best_accuracy:
        best_accuracy=accuracy
        best_k=k
        best_prediction=k_pred
print()
print("best_k",best_k)
print("best_accuracy",best_accuracy)

In [None]:
print(accuracy_values)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
plt.plot(k_values,accuracy_values,marker='o')
plt.title("Accuracy Vs Number of Neighbors(k)")
plt.xlabel('Number of Neighbors (k)')
plt.ylabel('Accuracy')
plt.xticks(k_values)
plt.grid(True)
plt.show()

In [None]:
#testing for unseen data

best_model=KNeighborsClassifier(n_neighbors=best_k)
best_model.fit(x_train,y_train)
new_image_path=["C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\dog.4132.jpg"]
new_data=[]
for image_path in new_image_path:
    preprocessed_image=preprocess_image(image_path)
    new_data.append(preprocessed_image)
new_data=np.array(new_data)

#num_samples,h,w=new_data.shape
#new_data=new_data.reshape(num_samples,h*w)
print(new_data.shape)
new_pred=best_model.predict(new_data)
print(new_pred)


without using any train_test_split method


In [None]:
testfolder="C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\test_set"
class_names=["cats","dogs"]
data_test=[]
label_test=[]

for class_name in class_names:
    class_path=os.path.join(testfolder,class_name)
    for image_name in os.listdir(class_path):
        if not image_name.startswith('_DS_Store'):
            image_path=os.path.join(class_path,image_name)
            pre_image=preprocess_image(image_path)
            data_test.append(pre_image)
            label_test.append(class_name)
data_test=np.array(data_test)
label_test=np.array(label_test)

In [None]:
print(data_test.shape)


In [None]:
knn=KNeighborsClassifier(n_neighbors=5)
knn.fit(data_2d,label)
knn_prediction=knn.predict(data_test)
score=accuracy_score(knn_prediction,label_test)
print(score)


In [None]:
print(len(label))

In [None]:
best_accuracy=0
best_k=None
best_prediction=None
k_values=[]
accuracy_values=[]
for k in range(1,26):
    knn=KNeighborsClassifier(n_neighbors=k)
    knn.fit(data_2d,label)
    k_pred=knn.predict(data_test)
    accuracy=accuracy_score(k_pred,label_test)
    k_values.append(k)
    accuracy_values.append(accuracy)
    
    print(f"K:{k},Accuracy:{accuracy}")
    
    if accuracy>best_accuracy:
        best_accuracy=accuracy
        best_k=k
        best_prediction=k_pred
print()
print("best_k",best_k)
print("best_accuracy",best_accuracy)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
plt.plot(k_values,accuracy_values,marker='o')
plt.title("Accuracy Vs Number of Neighbors(k)")
plt.xlabel('Number of Neighbors (k)')
plt.ylabel('Accuracy')
plt.xticks(k_values)
plt.grid(True)
plt.show()

In [None]:
best_model=KNeighborsClassifier(n_neighbors=best_k)
best_model.fit(data_2d,label)
new_image_path=["C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\cat3.jpg"]
new_data=[]
for image_path in new_image_path:
    preprocessed_image=preprocess_image(image_path)
    new_data.append(preprocessed_image)
new_data=np.array(new_data)
#print(new_data)
'''
num_samples,h,w=new_data.shape
new_data=new_data.reshape(num_samples,h*w)
'''
print(new_data.shape)
new_pred=best_model.predict(new_data)
print(new_pred)

class_name=best_model.classes_
predicted_class = new_pred[0]
print(predicted_class)

from PIL import Image
test_image=Image.open(image_path)
plt.imshow(test_image)
plt.title(f"Predicted_class:{class_names[predicted_class]}")
plt.axis("off")
plt.show()




------------------------------------ LINEAR CLASSIFIER--------------------------------------------------


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

lr=LogisticRegression()
lr.fit(data_2d,label)
lr_predict=lr.predict(data_test)
lr_accuracyscore=accuracy_score(lr_predict,label_test)
print(lr_accuracyscore)


MINI BATCH GRADIENT DESCENT

In [None]:
# cost function
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
label=le.fit_transform(label)

def sigmoid(z):
    return 1/(1+np.exp(-z))

def cost_estimation(x,y,theta):
    m=len(y)
    h=sigmoid(x @ theta)
    epsilon=1e-5
    cost=(-1/m)*np.sum(y*np.log(h+epsilon)+(1-y) * np.log((1-h)+ epsilon)) 
    return cost

bias_term=False
if bias_term:
    num_features=data_2d.shape[1]+1
else:
    num_features=data_2d.shape[1]
    
def mini_batch_gradient_descent(x,y,theta,learning_rate,batch_size,num_epochs):
    m=len(y)
    cost_history=[]
    for num in range(num_epochs):
        shuffle_indices=np.random.permutation(m)
        x=x[shuffle_indices]
        y=y[shuffle_indices]
        for i in range(0,m,batch_size): # find the best theta
            x_batch=x[i:i+batch_size]
            y_batch=y[i:i+batch_size]
            h=sigmoid(x_batch @ theta ) 
            gradient=(1/batch_size)*(x_batch.T @ (h-y_batch))
            theta=theta-(learning_rate * gradient)
        cost=cost_estimation(x,y,theta)
        cost_history.append(cost)
    return theta,cost_history

                 
        

initial_theta=np.random.rand(num_features)
learning_rate=0.01
num_epoch=100
batch_size=32
best_theta,cost_history=mini_batch_gradient_descent(data_2d,label,initial_theta,learning_rate,batch_size,num_epoch)
print("best_theta:",best_theta)
print()
print("optimised cost=",cost_history[-1])





In [None]:
print(cost_history)

In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(num_epoch),cost_history)
plt.title("mini batch gradient descent")
plt.xlabel("epochs")
plt.ylabel("cost")
plt.grid(True)


EARLY STOPPING METHOD

In [None]:
# cost function
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
label=le.fit_transform(label)

def sigmoid(z):
    return 1/(1+np.exp(-z))

def cost_estimation(x,y,theta):
    m=len(y)
    h=sigmoid(x @ theta)
    epsilon=1e-5
    cost=(-1/m)*np.sum(y*np.log(h+epsilon)+(1-y) * np.log((1-h)+ epsilon)) 
    return cost

bias_term=False
if bias_term:
    num_features=data_2d.shape[1]+1
else:
    num_features=data_2d.shape[1]
    
def mini_batch_gradient_descent(x,y,theta,learning_rate,batch_size,num_epochs):
    m=len(y)
    cost_history=[]
    best_cost=float("inf")
    count=0
    for num in range(num_epochs):
        shuffle_indices=np.random.permutation(m)
        x=x[shuffle_indices]
        y=y[shuffle_indices]
        for i in range(0,m,batch_size): # find the best theta
            x_batch=x[i:i+batch_size]
            y_batch=y[i:i+batch_size]
            h=sigmoid(x_batch @ theta ) 
            gradient=(1/batch_size)*(x_batch.T @ (h-y_batch))
            theta=theta-(learning_rate * gradient)
        cost=cost_estimation(x,y,theta)
        cost_history.append(cost)
        if cost<best_cost:
            best_cost=cost
            epoch=num
            count=0
        else:
            count+=1
            if count>=80:
                print("Early stopping")
                break
    return theta,cost_history,best_cost,epoch

                 
        

initial_theta=np.random.rand(num_features)
learning_rate=0.01
num_epoch=100
batch_size=32
best_theta,cost_history,best_cost,epoch=mini_batch_gradient_descent(data_2d,label,initial_theta,learning_rate,batch_size,num_epoch)
print("best_theta:",best_theta)
print()
print("optimised cost=",best_cost)
print()
print("epoch=",epoch)





In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(num_epoch),cost_history)
plt.title("mini batch gradient descent")
plt.xlabel("epochs")
plt.ylabel("cost")
plt.grid(True)

In [None]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
new_data_path=["C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\cat2.jpg"]
new_data=[]
for image_path in new_data_path:
    preprocessed_image=preprocess_image(image_path)
    new_data.append(preprocessed_image)
new_data=np.array(new_data)
#print(new_data)

#print(new_data.shape)

def softmax(logits):
    exp_logits=np.exp(logits-np.max(logits,keepdims=True))
    probability=exp_logits/(np.sum(exp_logits,keepdims=True))
    return probability



logits=new_data @ best_theta
probabilities=softmax(logits)
print(len(probabilities))
predicted_class=np.argmax(probabilities)
print(predicted_class)
#predicted_class=np.array(predicted_class)

#name=le.inverse_transform(predicted_class)[0]
print(predicted_class)


print(f" predicted_class {class_names[predicted_class]}")
    

SUPPORT VECTOR MACHINE(SVM)


In [None]:
from sklearn.svm import SVC
import os
from skimage import io,transform,filters,color
import numpy as np
def preprocess_image(image_path):
    image=io.imread(image_path)
    image=color.rgb2gray(image)
    image=filters.gaussian(image,sigma=1)
    image=transform.resize(image,(100,100))
    return image.flatten()
train_folder="C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\training_set"
class_names=['cats','dogs']
data=[]
label=[]
for class_name in class_names:
    class_path=os.path.join(train_folder,class_name)
    for image_name in os.listdir(class_path):
        if not image_name.startswith("_DS_Store"):
            image_path=os.path.join(class_path,image_name)
            preprocessed_image=preprocess_image(image_path)
            data.append(preprocessed_image)
            label.append(class_name)
data=np.array(data)
label=np.array(label)



In [None]:
test_folder="C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\test_set"
class_names=['cats','dogs']
data_test=[]
label_test=[]
for class_name in class_names:
    class_path=os.path.join(test_folder,class_name)
    for image_name in os.listdir(class_path):
        if not image_name.startswith("_DS_Store"):
            image_path=os.path.join(class_path,image_name)
            preprocessed_image=preprocess_image(image_path)
            data_test.append(preprocessed_image)
            label_test.append(class_name)
data_test=np.array(data_test)
label_test=np.array(label_test)

In [None]:
print(len(data_test))

In [None]:
from sklearn.preprocessing import StandardScaler,LabelEncoder
le=LabelEncoder()
label=le.fit_transform(label)
label_test=le.fit_transform(label_test)
'''

from sklearn.model_selection import train_test_split
data_train,data_test,label_train,label_test=train_test_split(data,label,test_size=0.2)
st=StandardScaler()
data=st.fit_transform(data)
data_test=st.transform(data_test)
'''

In [None]:
from sklearn.metrics import accuracy_score
svm_classifier=SVC(kernel='rbf',C=0.1,gamma=0.1)
svm_classifier.fit(data,label)


In [None]:
svm_predict=svm_classifier.predict(data_test)
print("accuracy_score=",accuracy_score(svm_predict,label_test))

Histogram of an image 

In [None]:
import matplotlib.pyplot as plt
image_paths=["C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\dog2.jpg"]
for image_path in image_paths:
    image=io.imread(image_path)
    gray_image=color.rgb2gray(image)

    plt.hist(gray_image.ravel(),bins=256,range=(0.0,1.0),color="gray")
    plt.title("histogram of an image")
    plt.xlabel("color intensity")
    plt.ylabel("frequency")
    plt.show()


    

In [None]:
import matplotlib.pyplot as plt
import numpy as np

image_paths=["C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\dog2.jpg"]
for image_path in image_paths:
    image=io.imread(image_path)
    red_channel,bin_edge=np.histogram(image[:,:,0],bins=256,range=(0,256))
    green_channel,_=np.histogram(image[:,:,1],bins=256,range=(0,256))
    blue_channel,_=np.histogram(image[:,:,2],bins=256,range=(0,256))
    
    red_channel=red_channel/red_channel.sum()
    green_channel=green_channel/green_channel.sum()
    blue_channel=blue_channel/blue_channel.sum()
    
    plt.plot(red_channel,color='red',label='RED channel',alpha=1)
    plt.plot(green_channel,color='green',label='GREEN channel',alpha=1)
    plt.plot(blue_channel,color='blue',label='BLUE channel',alpha=1)


    plt.title("histogram of an  image")
    plt.xlabel("color intensity")
    plt.ylabel("frequency")
    plt.legend()
    plt.show()


In [None]:
rows,colmn=image.shape[:2] # histogram of an sub images
mid=rows//2
top_image=image[:mid,:]
bottom_image=image[mid:,:]
middle=colmn//2
left_image=image[:,:middle]
right_image=image[:,middle:]

subimages=[top_image,bottom_image,left_image,right_image]

for i ,subimage in enumerate (subimages):
    
    red_channel,bin_edge=np.histogram(subimage[:,:,0],bins=256,range=(0,256))
    green_channel,_=np.histogram(subimage[:,:,1],bins=256,range=(0,256))
    blue_channel,_=np.histogram(subimage[:,:,2],bins=256,range=(0,256))
    
    red_channel=red_channel/red_channel.sum()
    green_channel=green_channel/green_channel.sum()
    blue_channel=blue_channel/blue_channel.sum()
    
    plt.plot(red_channel,color='red',label='RED channel',alpha=1)
    plt.plot(green_channel,color='green',label='GREEN channel',alpha=1)
    plt.plot(blue_channel,color='blue',label='BLUE channel',alpha=1)


    plt.title(f"histogram of an  subimage {i+1}")
    plt.xlabel("color intensity")
    plt.ylabel("frequency")
    plt.legend()
    plt.show()

HOG Feature Extraction

In [None]:
import os
from skimage import io,color
from skimage.filters import sobel
from skimage.transform import resize
import numpy as np
from skimage.feature import hog
image_folder="C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\training_set"

def preprocess(image_path):
    image=io.imread(image_path)
    gray_image=color.rgb2gray(image)
    gray_image=resize(gray_image,(100,100))
    gradient_magnitude=sobel(gray_image)
    gradient_direction=np.arctan2(sobel(gray_image,axis=0),sobel(gray_image,axis=1))
    hog_feature=hog(gradient_magnitude,block_norm='L2-Hys',pixels_per_cell=(16,16),cells_per_block=(1,1))
    return hog_feature.flatten()

features=[]
class_names=['cats','dogs']
for class_name in class_names:
    class_path=os.path.join(image_folder,class_name)
    for image_name in os.listdir(class_path):
        if not image_name.startswith('_DS_Store'):
            image_path=os.path.join(class_path,image_name)
            feature=preprocess(image_path)
            features.append(feature)

            

In [None]:
features_data=np.array(features)

In [None]:
from sklearn.preprocessing import StandardScaler,LabelEncoder
le=LabelEncoder()
label=le.fit_transform(label)
from sklearn.model_selection import train_test_split
feature_train,feature_test,l_train,l_test=train_test_split(features_data,label,test_size=0.2)

In [None]:
from sklearn.model_selection import GridSearchCV
grid_param={'C':[0.001,0.01,0.1,1,10],
            'gamma':[0.001,0.01,0.1,1,10]}
svm_classifier=SVC(kernel='rbf')
gridsearch=GridSearchCV(svm_classifier,grid_param,cv=5)
gridsearch.fit(feature_train,l_train)

In [None]:
print("best parameter:",gridsearch.best_params_)
print("best cross validation score=",gridsearch.best_score_)

In [None]:
from sklearn.model_selection import RandomizedSearchCV
param={'C':[0.001,0.01,0.1,1,10],
            'gamma':[0.001,0.01,0.1,1,10]}
svm_classifier=SVC(kernel='rbf')
randomsearch=RandomizedSearchCV(svm_classifier,param,cv=5)
randomsearch.fit(feature_train,l_train)

In [None]:
print("best parameter:",randomsearch.best_params_)
print("best cross validation score=",randomsearch.best_score_)

In [None]:
from sklearn.metrics import accuracy_score
svm_classifier=SVC(kernel='rbf',C=10,gamma=0.1)
svm_classifier.fit(feature_train,l_train)
svm_predict=svm_classifier.predict(feature_test)
print("accuracy_score=",accuracy_score(svm_predict,l_test))

without manually specifies the "C" and "gamma" value

In [None]:
svm_classifier_w=SVC(kernel='rbf')
svm_classifier_w.fit(feature_train,l_train)
svm_predict=svm_classifier_w.predict(feature_test)
print("accuracy_score=",accuracy_score(svm_predict,l_test))

In [None]:
print(len(l_test))

In [None]:
import matplotlib.pyplot as plt

m = 0
n = 0
misclassified_points = []
correct_classified_points = []

for i in range(len(l_test)):
    if svm_predict[i] == l_test[i]:
        m += 1
        correct_classified_points.append((i, m))
    else:
        n += 1
        misclassified_points.append((i, n))

print("Misclassified amount:", n)
print("Correct classified amount:", m)


misclassified_x, misclassified_y = zip(*misclassified_points)
correct_classified_x, correct_classified_y = zip(*correct_classified_points)
print(misclassified_x)

plt.figure(figsize=(10, 6))
plt.scatter(misclassified_x, misclassified_y, color='red', marker='x', label='Misclassified')
plt.scatter(correct_classified_x, correct_classified_y, color='green', marker='o', label='Correct Classified')
plt.xlabel("Sample Index")
plt.ylabel("Cumulative Count")
plt.title("Misclassified and Correct Classified Samples")
plt.legend()
plt.show()


In [None]:
new_data_path=["C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\dog2.jpg"]
new_data=[]
for image_path in new_data_path:
    features_new_data=preprocess(image_path)
    new_data.append(features_new_data)
new_data=np.array(new_data)

svm_predict=svm_classifier_w.predict(new_data)
#print(svm_predict)
predicted_class=class_names[svm_predict[0]]
#print(predicted_class)

from PIL import Image
new_image=Image.open(new_data_path[0])
plt.title(f"predicted class:{predicted_class}") 
plt.imshow(new_image)
plt.axis("off")
plt.show()

Gradient Direction of an image

In [None]:
image_path="C:\\Users\\User\\Documents\\digital image processing\\New folder\\data\\dog2.jpg"

from skimage.filters import sobel
from skimage import color
import numpy as np

image=io.imread(image_path)
gray_image=color.rgb2gray(image)
#magnitude=sobel(image)
gradient_direction=np.arctan2(sobel(gray_image,axis=0),sobel(gray_image,axis=1))

import matplotlib.pyplot as plt
plt.imshow(gradient_direction,cmap='hsv',vmin=-np.pi,vmax=np.pi)
plt.colorbar(label='gradient direction(radians)')
plt.title("Gradient direction")
plt.axis("off")
plt.show()

VOTING CLASSIFIER

In [None]:
from sklearn.ensemble import RandomForestClassifier,VotingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

rf=RandomForestClassifier()
knn=KNeighborsClassifier(n_neighbors=2)
svm=SVC(kernel='rbf')
lr=LogisticRegression()
vlf=VotingClassifier(estimators=[('rf',rf),('knn',knn),('svm',svm),('lr',lr)],voting='hard')
vlf.fit(feature_train,l_train)
for clf in (rf,knn,svm,lr,vlf):
    clf.fit(feature_train,l_train)
    clf_predict=clf.predict(feature_test)
    print(clf.__class__.__name__, accuracy_score(clf_predict,l_test))
    

