<a href="https://colab.research.google.com/github/thedataninja1786/Machine-Learning/blob/main/feature_extraction_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import os 
import numpy as np 
from sklearn import svm 
from sklearn.model_selection import train_test_split
from sklearn import metrics 
from torch import optim, nn
from torchvision import models, transforms
model = models.vgg16(pretrained=True)

In [None]:
class FeatureExtractor(nn.Module):
  def __init__(self, model):
    super(FeatureExtractor, self).__init__()
		# Extract VGG-16 Feature Layers
    self.features = list(model.features)
    self.features = nn.Sequential(*self.features)
		# Extract VGG-16 Average Pooling Layer
    self.pooling = model.avgpool
		# Convert the image into one-dimensional vector
    self.flatten = nn.Flatten()
		# Extract the first part of fully-connected layer from VGG16
    self.fc = model.classifier[0]
  
  def forward(self, x):
		# It will take the input 'x' until it returns the feature vector called 'out'
    out = self.features(x)
    out = self.pooling(out)
    out = self.flatten(out)
    out = self.fc(out) 
    return out 

# Initialize the model
model = models.vgg16(pretrained=True)
new_model = FeatureExtractor(model)

# Change the device to GPU if available 
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
new_model = new_model.to(device)
new_model

In [None]:
transformer = transforms.Compose([transforms.ToPILImage(),
                                  # (224,224) for resnet and vgg16 and (227,227) for alexnet 
                                  transforms.Resize((224,224)), 
                                  transforms.RandomHorizontalFlip(),
                                  transforms.RandomAutocontrast(),
                                  transforms.RandomGrayscale(),
                                  transforms.ToTensor(), 
                                  transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 
                                  ])

In [None]:
import cv2 
from tqdm import tqdm
train_path = '/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/Data/train' 
test_path = '/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/Data/test' 

paths = [train_path,test_path]
features = []
classes = {'COVID19':0,'NORMAL':1,'PNEUMONIA':2}

for path in paths:
  for dir in os.listdir(path): 
    count = 0 
    # Assign a class-label for each dir
    for img in os.listdir(os.path.join(path,dir)):
      count += 1 
      img_path = os.path.join(path,dir,img)
      #print(img_path)
      label = classes[dir]
      img = cv2.imread(img_path)
      img = transformer(img)
      # Reshape the image. PyTorch model reads 4-dimensional tensor
      # [batch_size, channels, width, height]
      img = img.reshape(1,3,224,224)
      img = img.to(device)
      # We only extract features, so we don't need gradient
      with torch.no_grad():
        # Extract the feature from the image
        feature = new_model(img)
      # Convert to NumPy Array, Reshape it, and save it to features variable
      features.append([feature.cpu().detach().numpy().reshape(-1),label]) 
      if count >= 620 and path == paths[0]:
        print('Breaked training')
        break
      elif count > 160 and path == paths[1]:
        print('Breaked testing')
        break 
        
np.save('/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/extracted_features.npy',features)

In [None]:
# Get class count 
class_count = {}
for row in features:
  if row[1] not in class_count:
    class_count[row[1]] = 1
  else:
    class_count[row[1]] += 1
print(class_count) 

In [None]:
features = np.load('/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/extracted_features.npy', allow_pickle=True)
np.random.seed(0)
np.random.shuffle(features)

X = []
Y = []

for feature in features:
  X.append(feature[0])
  Y.append(feature[1])

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.15)

SVM = svm.SVC(max_iter=10000, C = 10)
SVM.fit(X_train,y_train)
predictions = SVM.predict(X_test)

print(metrics.classification_report(y_test,predictions))

In [None]:
path = '/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/Test_images/Data'


transformer = transforms.Compose([transforms.ToPILImage(),
                                  transforms.Resize((224,224)), 
                                  transforms.ToTensor(), 
                                  transforms.Normalize([0.485, 0.456, 0.406],
                                                       [0.229, 0.224, 0.225]) 
                                  ])
def process_img(path):
  test_features = []
  for img in os.listdir(path):
    print(img)
    img = cv2.imread(os.path.join(path,img))
    img = transformer(img)
    img = img.reshape(1,3,224,224)
    img = img.to(device)
    with torch.no_grad():
      feature = new_model(img)
    test_features.append(feature.cpu().detach().numpy().reshape(-1))
  return test_features 
  
test_features = process_img(path)
print(SVM.predict(test_features))