<a href="https://colab.research.google.com/github/sftSalman/pytorch/blob/main/pytorch_customDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Imports 
import os
from typing import Union

import torch.nn.functional as F  # All functions that don't have any parameters
import pandas as pd
import torch
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torchvision
import torchvision.transforms as transforms  # Transformations we can perform on our dataset
from pandas import io

# from skimage import io
from torch.utils.data import (
    Dataset,
    DataLoader,
)  # Gives easier dataset managment and creates mini batches
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions


# Create Fully Connected Network
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


class SoloDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        x_data = self.annotations.iloc[index, 0:11]
        x_data = torch.tensor(x_data)
        y_label = torch.tensor(int(self.annotations.iloc[index, 11]))

        return (x_data.float(), y_label)


# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
num_classes = 26
learning_rate = 1e-3
batch_size = 5
num_epochs = 30
input_size = 11

# Load Data
dataset = SoloDataset(
    csv_file="/content/drive/MyDrive/Deep Learning/pytorch/Test_data/power.csv", root_dir="test123", transform=transforms.ToTensor()
)
train_set, test_set = torch.utils.data.random_split(dataset, [2900, 57])
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)

# Model
model = NN(input_size=input_size, num_classes=num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print(len(train_set))
print(len(test_set))
# Train Network
for epoch in range(num_epochs):
    losses = []

    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        # forward
        scores = model(data)
        loss = criterion(scores, targets)

        losses.append(loss.item())

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()

    print(f"Cost at epoch {epoch} is {sum(losses) / len(losses)}")


# Check accuracy on training to see how good our model is
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
        )

    model.train()


print("Checking accuracy on Training Set")
check_accuracy(train_loader, model)

print("Checking accuracy on Test Set")
check_accuracy(test_loader, model)

2900
57
Cost at epoch 0 is 3107.2300162479796
Cost at epoch 1 is 1130.4823875164163
Cost at epoch 2 is 951.1848523896316
Cost at epoch 3 is 805.6064470356908
Cost at epoch 4 is 761.6681481591586
Cost at epoch 5 is 616.726986181325
Cost at epoch 6 is 581.7278023555361
Cost at epoch 7 is 500.8897549070161
Cost at epoch 8 is 418.8894119525778
Cost at epoch 9 is 370.93029543120286
Cost at epoch 10 is 300.26734644133467
Cost at epoch 11 is 257.1585857917523
Cost at epoch 12 is 204.35449925784408
Cost at epoch 13 is 129.86832304165281
Cost at epoch 14 is 82.10462071422873
Cost at epoch 15 is 49.257336060959716
Cost at epoch 16 is 23.694419720255095
Cost at epoch 17 is 10.078009284775833
Cost at epoch 18 is 4.696851597983262
Cost at epoch 19 is 3.547657306440945
Cost at epoch 20 is 3.3840158877701594
Cost at epoch 21 is 3.298790069078577
Cost at epoch 22 is 3.271124686865971
Cost at epoch 23 is 3.253519662084251
Cost at epoch 24 is 3.253551259123046
Cost at epoch 25 is 3.257407721568798
Cost 

In [1]:
"""
Example of how to create custom dataset in Pytorch. In this case
we have images of cats and dogs in a separate folder and a csv
file containing the name to the jpg file as well as the target
label (0 for cat, 1 for dog).
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
*    2020-04-03 Initial coding
"""

# Imports
import torch
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torchvision.transforms as transforms  # Transformations we can perform on our dataset
import torchvision
import os
import pandas as pd
from skimage import io
from torch.utils.data import (
    Dataset,
    DataLoader,
)  # Gives easier dataset managment and creates mini batches


class CatsAndDogsDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        image = io.imread(img_path)
        y_label = torch.tensor(int(self.annotations.iloc[index, 1]))

        if self.transform:
            image = self.transform(image)

        return (image, y_label)


# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
in_channel = 3
num_classes = 2
learning_rate = 1e-3
batch_size = 32
num_epochs = 10

# Load Data
dataset = CatsAndDogsDataset(
    csv_file="/content/drive/MyDrive/Deep Learning/pytorch/Test_data/cats_dogs.csv",
    root_dir="/content/drive/MyDrive/Deep Learning/pytorch/Test_data/cats_dogs_resized",
    transform=transforms.ToTensor(),
)

# Dataset is actually a lot larger ~25k images, just took out 10 pictures
# to upload to Github. It's enough to understand the structure and scale
# if you got more images.
train_set, test_set = torch.utils.data.random_split(dataset, [5, 5])
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)

# Model
model = torchvision.models.googlenet(pretrained=True)
model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train Network
for epoch in range(num_epochs):
    losses = []

    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        # forward
        scores = model(data)
        loss = criterion(scores, targets)

        losses.append(loss.item())

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()

    print(f"Cost at epoch {epoch} is {sum(losses)/len(losses)}")

# Check accuracy on training to see how good our model is
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
        )

    model.train()


print("Checking accuracy on Training Set")
check_accuracy(train_loader, model)

print("Checking accuracy on Test Set")
check_accuracy(test_loader, model)

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth


  0%|          | 0.00/49.7M [00:00<?, ?B/s]



Cost at epoch 0 is 7.070487022399902
Cost at epoch 1 is 4.7750139236450195
Cost at epoch 2 is 2.546203136444092
Cost at epoch 3 is 0.948806881904602
Cost at epoch 4 is 0.5999878644943237
Cost at epoch 5 is 0.12205065786838531
Cost at epoch 6 is 0.07505355775356293
Cost at epoch 7 is 0.041181229054927826
Cost at epoch 8 is 0.029677793383598328
Cost at epoch 9 is 0.042147137224674225
Checking accuracy on Training Set
Got 5 / 5 with accuracy 100.00
Checking accuracy on Test Set
Got 2 / 5 with accuracy 40.00
