In [42]:
import torch
import torch.nn as nn
from glob import glob
from tqdm import tqdm
import numpy as np
import cv2
from torch.utils.data import Dataset , DataLoader
from torchvision import transforms
import torchvision

In [5]:
from random import shuffle

In [2]:
hate_images = glob('Hate Speech/**.jpg')
nohate_images = glob('No Hate Speech/**.jpg')

In [13]:
shuffle(hate_images)
shuffle(nohate_images)

In [14]:
train_hate = hate_images[:round(len(hate_images)*.80)]
train_nohate = nohate_images[:round(len(nohate_images)*.80)]


test_hate = hate_images[round(len(hate_images)*.80):]
test_nohate = nohate_images[round(len(nohate_images)*.80):]


In [15]:
train_images = train_hate + train_nohate
test_images = test_hate + test_nohate

In [18]:
shuffle(train_images)
shuffle(test_images)

In [39]:
transform = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

In [61]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

img_size = 256

aug_t= A.Compose([
            A.Resize(img_size,img_size),
            A.RandomCrop(img_size,img_size),
            A.HorizontalFlip(0.5),
            A.VerticalFlip(0.5),

    
            A.ShiftScaleRotate(rotate_limit=3),
            A.Blur(p=0.25),


            A.Normalize(mean=(0), std=(1)),
            ToTensorV2(p=1.0),
        ], p=1.0)

aug_v= A.Compose([
            A.Resize(img_size,img_size),

            A.Normalize(mean=(0), std=(1)),
            ToTensorV2(p=1.0),
        ], p=1.0)

In [62]:
class hateSpeechDataset(Dataset):
    

    def __init__(self, imageDirectory, transform=None):
       
        self.imagePath = imageDirectory
        self.transform = transform

    def __len__(self):
        return len(self.imagePath)

    def __getitem__(self, idx):
        path = self.imagePath[idx]
        rd = path.split('/')[0]
        label = 0
        if rd == 'Hate Speech':
            label = 1
        
        image = cv2.imread(path)
        # image = cv2.resize(image,[256,256])
        
        if self.transform:
            image = self.transform(image=image)['image']
            
        
        return image , label
            
        
        
        


In [63]:
trainset = hateSpeechDataset(train_images, transform=aug_v)
testset = hateSpeechDataset(test_images, transform=aug_v)

In [64]:
trainLoader = DataLoader(trainset,batch_size=16)
testLoader = DataLoader(testset,batch_size=16)

In [65]:
for batch , (image , label ) in enumerate(trainLoader):
    print(image.shape)
    break

torch.Size([16, 3, 256, 256])


In [66]:
## Model 

device = 'cpu'
weights = torchvision.models.resnet18 # .DEFAULT = best available weights 
model = torchvision.models.resnet18(weights=weights).to(device)



In [67]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [68]:
for param in model.parameters():
    param.requires_grad = False

In [69]:
model.fc = nn.Linear(in_features=512,out_features=2)

In [70]:
from torch.optim import Adam
 
# Define the loss function with Classification Cross-Entropy loss and an optimizer with Adam optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

In [71]:
def testAccuracy():
    
    model.eval()
    accuracy = 0.0
    total = 0.0
    
    with torch.no_grad():
        for data in testLoader:
            images, labels = data
            # run the model on the test set to predict labels
            outputs = model(images)
            # the label with the highest energy will be our prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            accuracy += (predicted == labels).sum().item()
    
    # compute the accuracy over all test images
    accuracy = (100 * accuracy / total)
    return(accuracy)

In [72]:
def saveModel():
    path = "./bestModel.pth"
    torch.save(model.state_dict(), path)

In [75]:
from torch.autograd import Variable
def train(num_epochs):
    
    best_accuracy = 0.0

    # Define your execution device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("The model will be running on", device, "device")
    # Convert model parameters and buffers to CPU or Cuda
    model.to(device)

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        running_acc = 0.0
        
        loop = tqdm(trainLoader)

        for i, (images, labels) in enumerate(loop):
            
            # get the inputs
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))

            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model(images)
            # compute the loss based on model output and real labels
            loss = loss_fn(outputs, labels)
            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()

            # Let's print statistics for every 1,000 images
            running_loss += loss.item()     # extract the loss value
            if i % 1000 == 999:    
                # print every 1000 (twice per epoch) 
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 1000))
                # zero the loss
                running_loss = 0.0

        # Compute and print the average accuracy fo this epoch when tested over all 10000 test images
        accuracy = testAccuracy()
        print('For epoch', epoch+1,'the test accuracy over the whole test set is %d %%' % (accuracy))
        
        # we want to save the model if the accuracy is the best
        if accuracy > best_accuracy:
            saveModel()
            best_accuracy = accuracy

In [76]:
train(2)

The model will be running on cpu device


100%|██████████| 180/180 [02:21<00:00,  1.27it/s]


For epoch 1 the test accuracy over the whole test set is 63 %


100%|██████████| 180/180 [02:18<00:00,  1.30it/s]


For epoch 2 the test accuracy over the whole test set is 63 %
