In [85]:
import os,json
import pandas as pd
import seaborn as sn
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision as tv
from torch.utils.data import DataLoader,Dataset,Subset
from importlib import import_module
from IPython.display import display
from pytorch_lightning import LightningModule, Trainer, seed_everything,LightningDataModule,loggers,seed_everything
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks import EarlyStopping,ModelCheckpoint
from torch.optim.lr_scheduler import OneCycleLR
from torch.optim.swa_utils import AveragedModel, update_bn
from torchmetrics.functional import accuracy
from dataclasses import dataclass,asdict
from tqdm import tqdm
import torchvision.transforms as transforms
from tqdm.contrib import tzip,tenumerate
from omegaconf import OmegaConf
from collections import *
seed_everything(42)

dataset_dir="/home/lvhang/autodl-tmp/datasets"

Global seed set to 42


In [93]:
channel_stats = dict(mean = [0.4914, 0.4822, 0.4465],
                            std = [0.2023, 0.1994, 0.2010])
eval_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(**channel_stats)
        ])
data=tv.datasets.CIFAR10(root=dataset_dir, train=True, download=True,transform=eval_transform)
a,b= DataLoader([],100),range(20)
for q,w in zip(a,b):
    print(w)

Files already downloaded and verified


In [82]:
def uniform_split_subset(dataset,indice,length,shuffle=False):
    full_targets,indices=np.array(dataset.targets),np.array(indice)
    sub_targets=full_targets[indices]
    unique_labels=np.unique(sub_targets)
    subsets=[]
    used_label_num=defaultdict(int)
    for ratio in length:
        # for every item in length, construct a subset
        indice=[]
        for y in unique_labels:
            indice_idxes=np.where(sub_targets==y)[0]
            targets_idxes=indices[indice_idxes]
            cur=int(ratio*len(indice_idxes))
            used=used_label_num[y]
            indice.extend(targets_idxes[used:used+cur])
            used_label_num[y]+=cur
        if shuffle: np.random.shuffle(indice)
        subsets.append(Subset(dataset, indice))
    return subsets

def uniform_split_dataset(dataset,length,shuffle=True):
    targets=np.array(dataset.targets)
    subsets,unique_labels,used_label_num=[],np.unique(targets),defaultdict(int)
    for ratio in length:
        # for every item in length, construct a subset
        indice=[]
        for y in unique_labels:
            # for every label, append ratio*len idxes into indice
            idxes=np.where(targets==y)[0]
            cur=int(ratio*len(idxes))
            used=used_label_num[y]
            indice.extend(idxes[used:used+cur])
            used_label_num[y]+=cur
        if shuffle: np.random.shuffle(indice)
        subsets.append(Subset(dataset, indice))
    return subsets

def uniform_split(dataset,length,shuffle=False):
    if len(dataset)==0: 
        return [[]]*len(length)
    if isinstance(dataset,Subset):
        # if is subset, restore subset to full-dataset while keep indice stay subset
        indice=np.array(dataset.indices)
        while isinstance(dataset,Subset):
            dataset=dataset.dataset
        return uniform_split_subset(dataset,indice,length,shuffle)
    else:
        return uniform_split_dataset(dataset,length,shuffle)

In [87]:
a,b=uniform_split(tv.datasets.CIFAR10(root=dataset_dir, train=True, download=True,transform=eval_transform),[1,0])
len(a),len(b)

Files already downloaded and verified


(50000, 0)

In [123]:
np.random.choice([1,2,3],4,replace=False)

ValueError: Cannot take a larger sample than population when 'replace=False'

In [136]:
a,b,c=torch.rand((100,10)),torch.rand((50,10)),torch.rand((60,10))
d=torch.cat((a,b,c))
d.shape

torch.Size([210, 10])

In [140]:
q,w,e=d.chunk(3)

In [148]:
import numpy as np
from collections import Counter
from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report 

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
    
class MyConv1dPadSame(nn.Module):
    """
    extend nn.Conv1d to support SAME padding
    """
    def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1):
        super(MyConv1dPadSame, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.groups = groups
        self.conv = torch.nn.Conv1d(
            in_channels=self.in_channels, 
            out_channels=self.out_channels, 
            kernel_size=self.kernel_size, 
            stride=self.stride, 
            groups=self.groups)

    def forward(self, x):
        
        net = x
        
        # compute pad shape
        in_dim = net.shape[-1]
        out_dim = (in_dim + self.stride - 1) // self.stride
        p = max(0, (out_dim - 1) * self.stride + self.kernel_size - in_dim)
        pad_left = p // 2
        pad_right = p - pad_left
        net = F.pad(net, (pad_left, pad_right), "constant", 0)
        
        net = self.conv(net)

        return net
        
class MyMaxPool1dPadSame(nn.Module):
    """
    extend nn.MaxPool1d to support SAME padding
    """
    def __init__(self, kernel_size):
        super(MyMaxPool1dPadSame, self).__init__()
        self.kernel_size = kernel_size
        self.stride = 1
        self.max_pool = torch.nn.MaxPool1d(kernel_size=self.kernel_size)

    def forward(self, x):
        
        net = x
        
        # compute pad shape
        in_dim = net.shape[-1]
        out_dim = (in_dim + self.stride - 1) // self.stride
        p = max(0, (out_dim - 1) * self.stride + self.kernel_size - in_dim)
        pad_left = p // 2
        pad_right = p - pad_left
        net = F.pad(net, (pad_left, pad_right), "constant", 0)
        
        net = self.max_pool(net)
        
        return net
    
class BasicBlock(nn.Module):
    """
    ResNet Basic Block
    """
    def __init__(self, in_channels, out_channels, kernel_size, stride, groups, downsample, use_bn, use_do, is_first_block=False):
        super(BasicBlock, self).__init__()
        
        self.in_channels = in_channels
        self.kernel_size = kernel_size
        self.out_channels = out_channels
        self.stride = stride
        self.groups = groups
        self.downsample = downsample
        if self.downsample:
            self.stride = stride
        else:
            self.stride = 1
        self.is_first_block = is_first_block
        self.use_bn = use_bn
        self.use_do = use_do

        # the first conv
        self.bn1 = nn.BatchNorm1d(in_channels)
        self.relu1 = nn.ReLU()
        self.do1 = nn.Dropout(p=0.5)
        self.conv1 = MyConv1dPadSame(
            in_channels=in_channels, 
            out_channels=out_channels, 
            kernel_size=kernel_size, 
            stride=self.stride,
            groups=self.groups)

        # the second conv
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.relu2 = nn.ReLU()
        self.do2 = nn.Dropout(p=0.5)
        self.conv2 = MyConv1dPadSame(
            in_channels=out_channels, 
            out_channels=out_channels, 
            kernel_size=kernel_size, 
            stride=1,
            groups=self.groups)
                
        self.max_pool = MyMaxPool1dPadSame(kernel_size=self.stride)

    def forward(self, x):
        
        identity = x
        
        # the first conv
        out = x
        if not self.is_first_block:
            if self.use_bn:
                out = self.bn1(out)
            out = self.relu1(out)
            if self.use_do:
                out = self.do1(out)
        out = self.conv1(out)
        
        # the second conv
        if self.use_bn:
            out = self.bn2(out)
        out = self.relu2(out)
        if self.use_do:
            out = self.do2(out)
        out = self.conv2(out)
        
        # if downsample, also downsample identity
        if self.downsample:
            identity = self.max_pool(identity)
            
        # if expand channel, also pad zeros to identity
        if self.out_channels != self.in_channels:
            identity = identity.transpose(-1,-2)
            ch1 = (self.out_channels-self.in_channels)//2
            ch2 = self.out_channels-self.in_channels-ch1
            identity = F.pad(identity, (ch1, ch2), "constant", 0)
            identity = identity.transpose(-1,-2)
        
        # shortcut
        out += identity

        return out
    
class ResNet1D(nn.Module):
    """
    
    Input:
        X: (n_samples, n_channel, n_length)
        Y: (n_samples)
        
    Output:
        out: (n_samples)
        
    Pararmetes:
        in_channels: dim of input, the same as n_channel
        base_filters: number of filters in the first several Conv layer, it will double at every 4 layers
        kernel_size: width of kernel
        stride: stride of kernel moving
        groups: set larget to 1 as ResNeXt
        n_block: number of blocks
        n_classes: number of classes
        
    """

    def __init__(self, in_channels, base_filters, kernel_size, stride, groups, n_block, n_classes, downsample_gap=2, increasefilter_gap=4, use_bn=True, use_do=True, verbose=False):
        super(ResNet1D, self).__init__()
        
        self.verbose = verbose
        self.n_block = n_block
        self.kernel_size = kernel_size
        self.stride = stride
        self.groups = groups
        self.use_bn = use_bn
        self.use_do = use_do

        self.downsample_gap = downsample_gap # 2 for base model
        self.increasefilter_gap = increasefilter_gap # 4 for base model

        # first block
        self.first_block_conv = MyConv1dPadSame(in_channels=in_channels, out_channels=base_filters, kernel_size=self.kernel_size, stride=1)
        self.first_block_bn = nn.BatchNorm1d(base_filters)
        self.first_block_relu = nn.ReLU()
        out_channels = base_filters
                
        # residual blocks
        self.basicblock_list = nn.ModuleList()
        for i_block in range(self.n_block):
            # is_first_block
            if i_block == 0:
                is_first_block = True
            else:
                is_first_block = False
            # downsample at every self.downsample_gap blocks
            if i_block % self.downsample_gap == 1:
                downsample = True
            else:
                downsample = False
            # in_channels and out_channels
            if is_first_block:
                in_channels = base_filters
                out_channels = in_channels
            else:
                # increase filters at every self.increasefilter_gap blocks
                in_channels = int(base_filters*2**((i_block-1)//self.increasefilter_gap))
                if (i_block % self.increasefilter_gap == 0) and (i_block != 0):
                    out_channels = in_channels * 2
                else:
                    out_channels = in_channels
            
            tmp_block = BasicBlock(
                in_channels=in_channels, 
                out_channels=out_channels, 
                kernel_size=self.kernel_size, 
                stride = self.stride, 
                groups = self.groups, 
                downsample=downsample, 
                use_bn = self.use_bn, 
                use_do = self.use_do, 
                is_first_block=is_first_block)
            self.basicblock_list.append(tmp_block)

        # final prediction
        self.final_bn = nn.BatchNorm1d(out_channels)
        self.final_relu = nn.ReLU(inplace=True)
        # self.do = nn.Dropout(p=0.5)
        self.dense = nn.Linear(out_channels, n_classes)
        # self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        
        out = x.view([x.shape[0], 1, -1]).float()
        
        # first conv
        if self.verbose:
            print('input shape', out.shape)
        out = self.first_block_conv(out)
        if self.verbose:
            print('after first conv', out.shape)
        if self.use_bn:
            out = self.first_block_bn(out)
        out = self.first_block_relu(out)
        
        # residual blocks, every block has two conv
        for i_block in range(self.n_block):
            net = self.basicblock_list[i_block]
            if self.verbose:
                print('i_block: {0}, in_channels: {1}, out_channels: {2}, downsample: {3}'.format(i_block, net.in_channels, net.out_channels, net.downsample))
            out = net(out)
            if self.verbose:
                print(out.shape)

        # final prediction
        if self.use_bn:
            out = self.final_bn(out)
        out = self.final_relu(out)
        out = out.mean(-1)
        if self.verbose:
            print('final pooling', out.shape)
        # out = self.do(out)
        out = self.dense(out)
        if self.verbose:
            print('dense', out.shape)
        # out = self.softmax(out)
        if self.verbose:
            print('softmax', out.shape)
        
        return out    

In [161]:
model = ResNet1D(
    in_channels=1, 
    base_filters=128, # 64 for ResNet1D, 352 for ResNeXt1D
    kernel_size=16, 
    stride=2, 
    groups=32, 
    n_block=48, 
    n_classes=10, 
    downsample_gap=6, 
    increasefilter_gap=12, 
    use_do=True)

data=torch.rand((10,228))

In [164]:
model(data)

tensor([[-2.0932e-01,  1.1567e-01,  6.6576e-01,  3.0436e-01, -3.4896e-01,
          1.9701e-01, -5.3079e-02,  1.4760e-01,  2.3587e-02, -2.3489e-01],
        [ 6.5987e-01,  3.4905e-01,  4.3179e-01, -3.5097e-01,  5.2153e-01,
         -2.0827e-01, -4.6656e-01, -4.2674e-01, -6.1366e-01,  3.0319e-01],
        [ 2.7515e-01, -1.9486e-01,  3.1357e-01,  5.3429e-01, -3.2544e-01,
         -5.2247e-01, -2.0388e-01, -7.0947e-01, -2.3968e-01,  2.7082e-01],
        [ 1.1587e+00,  2.4648e-01,  9.3658e-01,  3.1973e-01,  1.2939e-01,
          8.6242e-01, -8.4129e-02, -1.7655e-01, -1.0802e+00, -3.0260e-02],
        [-7.2247e-04,  1.6144e-01,  4.1531e-01, -1.5344e-01, -3.9538e-01,
         -7.1296e-01, -4.2701e-01, -1.8378e-01,  3.7335e-02,  2.9495e-01],
        [ 7.7061e-01, -3.6196e-01,  4.5273e-02,  2.9359e-02, -5.1546e-02,
         -2.3553e-01, -1.2109e-01, -2.0411e-01,  9.2551e-02,  6.0740e-01],
        [ 6.2648e-02,  2.4502e-01,  5.2747e-03,  2.1646e-01,  2.3483e-01,
          1.1304e-01, -6.6548e-0

In [None]:
import torchvision.