<a href="https://colab.research.google.com/github/sebasmos/vector-borne-satellite-predictor/blob/main/VIT_Deep_learning_for_Vector_Borne_Diseases.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Feature extraction with Vision Transformers (ViT) and Resnet50 for satellite imagery  
## Deep learning for Vector-Borne Diseases

![Status](https://img.shields.io/static/v1.svg?label=Status&message=Finished&color=green)

**Filled notebook:** 
[![View on Github](https://img.shields.io/static/v1.svg?logo=github&label=Repo&message=View%20On%20Github&color=lightgrey)](https://github.com/sebasmos/vector-borne-satellite-predictor)
  
**Pre-trained models:** 
[![View files on Github](https://img.shields.io/static/v1.svg?logo=github&label=Repo&message=View%20On%20Github&color=lightgrey)](https://github.com/sebasmos/vector-borne-satellite-predictor/tree/main/src/models)
[![GoogleDrive](https://img.shields.io/static/v1.svg?logo=google-drive&logoColor=yellow&label=GDrive&message=Download&color=yellow)](https://drive.google.com/drive/folders/1v9yBoygh_axFAm_YO-iHxX6uv9Ecg9ga?usp=sharing)   
**Author:** Sebastian A. Cajas Ordoñez

Complementary open-source collaboration for [MIT Critical data](https://github.com/MITCriticalData-Colombia/Dengue-Prediction-with-Satellite-Images) and [Mimi utily functions](https://github.com/MITCriticalData-Colombia/Dengue-Prediction-with-Satellite-Images)


1. [Utility functions](#0)
1. [Transfer-Learning with Resnet50](#1)
1. [Transler-learning with ViT](#2)


In [18]:
# Define desired number of features to extract
Num_feat = 100

<a name = '#0' ><a/>

# 1. Utility functions 

In [2]:
!pip install epiweeks

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting epiweeks
  Downloading epiweeks-2.1.4-py3-none-any.whl (5.9 kB)
Installing collected packages: epiweeks
Successfully installed epiweeks-2.1.4


In [3]:
# authenticate
from google.colab import auth
auth.authenticate_user()

# set your gcp project
!gcloud config set project mit-hst-dengue

!gsutil -q -m cp -r gs://colombia_sebasmos/DATASET_5_best_cities .

!ls DATASET_5_best_cities/

Updated property [core/project].
Cali  Cúcuta  Ibagué  Medellín	Villavicencio


In [4]:
!gdown --id 1RGrXHgvn60L4pHA40M0R0scszHLno5fD
!unzip "dengue.zip" -d .
!rm -f dengue.zip

Downloading...
From: https://drive.google.com/uc?id=1RGrXHgvn60L4pHA40M0R0scszHLno5fD
To: /content/dengue.zip
100% 28.7k/28.7k [00:00<00:00, 41.6MB/s]
Archive:  dengue.zip
   creating: ./dengue/
  inflating: ./dengue/merge_cases_temperature_WeeklyPrecipitation_timeseries.csv  


In [5]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import math
import os
import time
import sys
sys.path.insert(0,'..')
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
import sklearn.metrics as metrics

#-------ViT model---------------
!pip install transformers
!pip install datasets

from transformers import ViTFeatureExtractor, ViTForImageClassification
import torch
from datasets import load_dataset
import torch.nn as nn
from transformers import ViTModel
from transformers import ViTConfig

#-------Resnet model---------------
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.datasets.utils import download_file_from_google_drive
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader,Dataset
from PIL import Image
from PIL import Image
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import  mean_absolute_error
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score
import seaborn as sns
from scipy import signal
import pickle
from torchvision.datasets.folder import pil_loader
from sklearn.decomposition import PCA

from epiweeks import Week, Year
from datetime import date

from random import randint, randrange
from skimage import io
from skimage.transform import rescale, resize, downscale_local_mean

import skimage
import cv2
import os
import pandas as pd
import numpy as np
import pickle
#import plotly.express as px
import matplotlib.pyplot as plt
#torch.cuda.empty_cash()
import torch
torch.cuda.empty_cache()
import torch.nn as nn
import torch.optim as optim

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_MAE_score(y_test, y_pred):
  # y_test = y_test.detach().numpy()
  # y_pred = y_pred.detach().numpy()
  # y_test = torch.from_numpy(y_test)
  # y_pred = torch.from_numpy(y_pred)
  return round(mean_absolute_error(y_test, y_pred), 4)

def get_MAPE_score(y_true, y_pred):
  """Get Mean Absolute Percentage Error (MAPE)
  
  Calculate the MAPE score based on the prediction. 
  The lower MAPE socre is, the better the predictions are.

  """
  return round(mean_absolute_percentage_error(y_true, y_pred), 4)
def readImg(img_path, resize_ratio=None):
  img = io.imread(img_path)

  if resize_ratio:
    img_rescale = rescale(img, resize_ratio, anti_aliasing=True)

  print(os.path.basename(img_path), '(origin shape:', img.shape, '-> rescale:', str(img_rescale.shape) + ')')
  return img_rescale


# Load data from one of the source
def loadData(csv_folder, img_folder, option=None, resize_ratio=None):
  if option is None:
    # Get data by combining from csv and images
    df = loadStructuredData(csv_folder)
    info_dict = combineData(img_folder, df, resize_ratio)
    
    print(len(info_dict['LastDayWeek']), len(info_dict['Image']), len(info_dict['cases_medellin']))

  else:
    # Load data from previous pickle file
    info_dict = 1#loadDataFromPickle(option)
  return info_dict
  

def loadStructuredData(csv_path):
  df = pd.DataFrame()
  if os.path.isdir(csv_path):
    for filename in os.listdir(csv_path):
      file_path = os.path.join(csv_path, filename)
      df = df.append(pd.read_csv(file_path))
  elif os.path.isfile(csv_path):
    df = pd.read_csv(csv_path)
  else:
    print('Error: Not folder or file')
  return df
  
def getEpiWeek(origin_str):
  """Get epi week from string
  """
  date_ls = origin_str.split('-')
  return Week.fromdate(date(int(date_ls[0]), int(date_ls[1]), int(date_ls[2])))
  
def combineData(img_folder, df, resize_ratio=None):
  info_dict = {'LastDayWeek':[], 'cases_medellin':[], 'Image':[], 'epi_week':[]}
  img_list = os.listdir(img_folder)

  for index, row in df.iterrows():
    name = row['LastDayWeek']
    week_df = str(getEpiWeek(name))
    case = row['cases_medellin']
    for img_name in img_list:
      
      # If image name is image_2017-12-24.tiff -> get 2017-12-24
      # Reference Links: https://www.w3schools.com/python/ref_string_join.asp, 
      #                  https://stackoverflow.com/questions/13174468/how-do-you-join-all-items-in-a-list/13175535
      new_img_name = ''.join(i for i in img_name if i.isdigit() or i == '-')      

      week_img = str(getEpiWeek(new_img_name))
      #print(f"{week_df} = {week_img}")
      if week_df == week_img:
        #print("ENTRO")
        img_path = os.path.join(img_folder, img_name)
        img = readImg(img_path, resize_ratio)

        info_dict['Image'].append(img[:,:,1:4])
        info_dict['LastDayWeek'].append(name)
        info_dict['cases_medellin'].append(case)
        info_dict['epi_week'].append(week_df)
        break

  return info_dict

def splitTrainTestSet(ratio):
  # Split the data into training (ratio) and testing (1 - ratio)
  train_val_ratio = ratio
  train_num = int(len(info_dict['Image']) * train_val_ratio)

  # Change list to array
  origin_dimension_X = np.array(info_dict['Image'])
  labels = np.array(info_dict['cases_medellin'])

  print(''.center(60,'-'))

  origin_X_train = origin_dimension_X[:train_num,:,:,:]
  y_train = labels[:train_num]
  origin_X_test = origin_dimension_X[train_num:,:,:,:]
  y_test = labels[train_num:]

  # print('Total number of weeks:'.ljust(30), len(origin_dimension_X), 'weeks')
  # print('Training input:'.ljust(30), origin_X_train.shape)
  # print('Training output:'.ljust(30), y_train.shape)
  # print('Testing input:'.ljust(30), origin_X_test.shape)
  # print('Testing output:'.ljust(30), y_test.shape) 

  return origin_X_train, y_train, origin_X_test, y_test

# Polynomial Regression
def calc_r_2(x, y, degree):
    results = {}

    coeffs = np.polyfit(x, y, degree)

     # Polynomial Coefficients
    results['polynomial'] = coeffs.tolist()

    # r-squared
    p = np.poly1d(coeffs)
    # fit values, and mean
    yhat = p(x)                         # or [p(z) for z in x]
    ybar = np.sum(y)/len(y)          # or sum(y)/len(y)
    ssreg = np.sum((yhat-ybar)**2)   # or sum([ (yihat - ybar)**2 for yihat in yhat])
    sstot = np.sum((y - ybar)**2)    # or sum([ (yi - ybar)**2 for yi in y])

    return ssreg / sstot

def classified_with_SVR(origin_X_train, origin_X_test, y_train, y_test):
  print('[SVR]'.center(100, '-'))

  reshape_X_train = origin_X_train.reshape(origin_X_train.shape[0], -1)
  reshape_X_test = origin_X_test.reshape(origin_X_test.shape[0], -1)

  regressor = SVR(C=1.0, epsilon=0.2)
  regressor.fit(reshape_X_train, y_train)

  float_y_pred = regressor.predict(reshape_X_test)
  int_y_pred = [int(i) for i in float_y_pred]

  print('Predicted')
  print(' '.ljust(3, ' '), 'List =', int_y_pred)
  print(' '.ljust(3, ' '), 'Mean =', round(np.mean(int_y_pred), 4))
  print('')

  print('Real')
  print(' '.ljust(3, ' '), 'List =', y_test)
  print(' '.ljust(3, ' '), 'Mean =', round(np.mean(y_test), 4))
  print('')
  
  MAE = get_MAE_score(y_test, int_y_pred)
  MAPE = get_MAPE_score(y_test, int_y_pred)

  r_2 = calc_r_2(y_test, int_y_pred, 15)

  print('- MAE: ', str(MAE).rjust(8), '(cases different in average)')
  print('- MAPE:', str(MAPE).rjust(8), '(times different in average)')
  print('- r_squared:', str(r_2).rjust(8), '(times different in average)')

  return MAE, MAPE, r_2



def dimension_reduct_with_PCA(origin_X_train, origin_X_test, y_train):
  print(' PRINCIPAL COMPONENT ANALYSIS  '.center(100, '='))

  reshape_X_train = origin_X_train.reshape(origin_X_train.shape[0], -1)
  reshape_X_test = origin_X_test.reshape(origin_X_test.shape[0], -1)

  pca = PCA(n_components=0.95) 
  pca_X_train = pca.fit_transform(reshape_X_train)

  pca_X_test = pca.transform(reshape_X_test)
  print('Origin shape'.ljust(15), reshape_X_train.shape)
  print('Resize shape'.ljust(15), pca_X_train.shape)  

  return pca_X_train, pca_X_test

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.20.1-py3-none-any.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 9.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 75.1 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 13.4 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 59.6 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling P

In [6]:
img_folder = '/content/DATASET_5_best_cities/Medellín'
csv_folder = '/content/dengue/merge_cases_temperature_WeeklyPrecipitation_timeseries.csv'
#df = df.append(pd.read_csv(csv_folder))

info_dict = loadData(csv_folder, img_folder, resize_ratio=(0.7, 0.7, 1))

print('INFO_DICT'.center(50, '-'))
print('keys:', info_dict.keys())
print('')

print('DENGUE CASES'.center(50, '-'))
print('Max weekly dengue cases:', max(info_dict['cases_medellin']))
print('Min weekly dengue cases:', min(info_dict['cases_medellin']))
print('')

print('WEEKS'.center(50, '-'))
print('Max week:', max(info_dict['LastDayWeek']))
print('Min week:', min(info_dict['LastDayWeek']))

image_2015-11-01.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2015-11-08.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2015-11-15.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2015-11-22.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2015-11-29.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2015-12-06.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2015-12-13.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2015-12-20.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2015-12-27.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2016-01-03.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2016-01-10.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2016-01-17.tiff (origin shape: (745, 747, 12) -> rescale: (522, 523, 12))
image_2016-01-24.tiff (origin shape: (74

In [7]:
train_val_ratio = 0.8
train_num = int(len(info_dict['Image']) * train_val_ratio)

  # Change list to array
origin_dimension_X = np.array(info_dict['Image'])
labels = np.array(info_dict['cases_medellin'])

print(''.center(60,'-'))

origin_X_train = origin_dimension_X[:train_num,:,:,:]
y_train = labels[:train_num]
origin_X_test = origin_dimension_X[train_num:,:,:,:]
y_test = labels[train_num:]

print(f"origin_X_train: {origin_X_train.shape}")
print(f"y_train: {y_train.shape}")
print(f"origin_X_train: {origin_X_test.shape}")
print(f"y_train: {y_test.shape}")


------------------------------------------------------------
origin_X_train: (131, 522, 523, 3)
y_train: (131,)
origin_X_train: (33, 522, 523, 3)
y_train: (33,)


<a name="#1"></a>
# Transfer-Learning with Resnet50



In [8]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models

from torchvision.datasets.utils import download_file_from_google_drive
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader,Dataset
from PIL import Image

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

output_feat = 2048

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = models.resnet50(pretrained=True)
model.to(device)
model.fc = nn.Linear(output_feat, Num_feat) # assuming that the fc7 layer has 512 neurons, otherwise change it 
model.cuda()


train_transform = transforms.Compose([
      transforms.ToTensor()               
])

valid_transform = transforms.Compose([
    #transforms.Resize(224),    
    #transforms.CenterCrop(192),
    transforms.ToTensor(),
    #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))                  
])

class TrainDataset_improved(Dataset):
    def __init__(self, data, y, transform = None):
        self.data = data
        self.y = y
        self.transform = transform
    def __len__(self):
        return self.data.shape[0]
    def __getitem__(self, ind):
        x = self.data[ind]
        y = self.y[ind]
        if self.transform:
          x = self.transform(x)
        return x, y
  
# This dataloader is to extract features - using toTensor() to swap channels https://stackoverflow.com/questions/64629702/pytorch-transform-totensor-changes-image 

train_set = TrainDataset_improved(origin_dimension_X, labels, train_transform)
#test_set  = TrainDataset_improved(origin_X_test, y_test,  valid_transform)

batch_size = 1
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) # since is temporal data, do not shuffle on training
#test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)




  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [9]:
# Verify that transformation flipped channels order for pre-trained model
cont = 0
for batch, targets in train_loader:
  print(batch.shape)
  if cont >3:
    break
  cont+=1

torch.Size([1, 3, 522, 523])
torch.Size([1, 3, 522, 523])
torch.Size([1, 3, 522, 523])
torch.Size([1, 3, 522, 523])
torch.Size([1, 3, 522, 523])


In [10]:
train_loader.dataset.data.shape

(164, 522, 523, 3)

In [11]:
def extract_features(loader):
    
    # put the model in eval mode
    model.eval()

    features, labels = [], []
    cont = 0
    with torch.no_grad():
        for batch, targets in loader:
            #print(cont)
            batch = batch.float()
            #print(batch.shape)
            # extract the features using the model
            batch_features = model(batch.to(device))

            features.append(batch_features.data.cpu().numpy())
            labels.append(targets.numpy())
            cont+=1

    features = np.concatenate(features, axis=0)
    labels = np.concatenate(labels, axis=0)
    return features, labels

train_features, train_labels = extract_features(train_loader)
#valid_features, valid_labels = extract_features(test_loader)

print(f'train features are {train_features.shape}')
#print(f'valid features are {valid_features.shape}')

train features are (164, 100)


In [12]:
"""
import pickle
with open(os.path.join("/content/drive/MyDrive/Dengue_prediction/datathon", 'features_resnet50.pkl'), 'wb') as pkl_file:
                    pickle.dump(train_features, pkl_file)
with open(os.path.join("/content/drive/MyDrive/Dengue_prediction/datathon", 'labels_resnet50.pkl'), 'wb') as pkl_file:
                    pickle.dump(train_labels, pkl_file)
"""

'\nimport pickle\nwith open(os.path.join("/content/drive/MyDrive/Dengue_prediction/datathon", \'features_resnet50.pkl\'), \'wb\') as pkl_file:\n                    pickle.dump(train_features, pkl_file)\nwith open(os.path.join("/content/drive/MyDrive/Dengue_prediction/datathon", \'labels_resnet50.pkl\'), \'wb\') as pkl_file:\n                    pickle.dump(train_labels, pkl_file)\n'

<a name ="#2"></a>
# 3. ViT transformer

In [14]:
class ViTSTR(nn.Module):
    def __init__(self, config, num_labels):
        super(ViTSTR, self).__init__()
        #self.vit = ViTModel(config)
        self.vit = ViTModel.from_pretrained("google/vit-base-patch16-224") 
        self.head = nn.Linear(config.hidden_size, num_labels) if num_labels > 0 else nn.Identity()
        self.num_labels = num_labels

    def forward(self, pixel_values, seqlen=1):
        outputs = self.vit(pixel_values=pixel_values)
        # only keep seqlen last hidden states
        x = outputs.last_hidden_state[:, :seqlen]

        # batch_size, seqlen, embedding size
        b, s, e = x.size()
        x = x.reshape(b*s, e)
        x = self.head(x).view(b, s, self.num_labels)
        return x


# Initialize ViT model

config = ViTConfig()
feat_vit = ViTSTR(config, Num_feat) 

# Define transformations

train_transform = transforms.Compose([
      transforms.Resize(224),  
      transforms.ToTensor()               
])

valid_transform = transforms.Compose([
    transforms.Resize(224),    
    transforms.ToTensor(),               
])

class TrainDataset_improved(Dataset):
    def __init__(self, data, y, transform = None):
        self.data = data
        self.y = y
        self.transform = transform
    def __len__(self):
        return self.data.shape[0]
    def __getitem__(self, ind):
        x = self.data[ind]
        y = self.y[ind]

        x = Image.fromarray(np.uint8(x))
        if self.transform:
          x = self.transform(x)
        return x, y
  
# This dataloader is to extract features - using toTensor() to swap channels https://stackoverflow.com/questions/64629702/pytorch-transform-totensor-changes-image 

train_set = TrainDataset_improved(origin_dimension_X, labels, train_transform)
#test_set  = TrainDataset_improved(origin_X_test, y_test,  valid_transform)

batch_size = 1
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) # since is temporal data, do not shuffle on training
#test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)


Downloading:   0%|          | 0.00/68.0k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/330M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/vit-base-patch16-224 were not used when initializing ViTModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
# Checking dataloader shape
for m,l in train_loader:
  print(m.shape,l.shape)

torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size([1, 3, 224, 224]) torch.Size([1])
torch.Size

In [16]:
def extract_features(loader, model):
    
    # put the model in eval mode
    model.eval()
    model.to(device)
    features, labels = [], []
    cont = 0
    with torch.no_grad():
        for batch, targets in loader:
            #print(cont)
            batch = batch.float()
            batch_features = model(batch.to(device))

            # Shaping to 1,num_feat
            feat = batch_features.data.cpu().numpy()
            feat = np.squeeze(feat,0)
            #print(feat.shape)

            features.append(feat)
            labels.append(targets.numpy())
            cont+=1

    features = np.concatenate(features, axis=0)
    labels = np.concatenate(labels, axis=0)
    return features, labels

train_features, train_labels = extract_features(train_loader, feat_vit)
#valid_features, valid_labels = extract_features(test_loader)

print(f'train features are {train_features.shape}')
print(f' train_labels are {train_labels.shape}')

train features are (164, 100)
 train_labels are (164,)


In [17]:
"""
import pickle
with open(os.path.join("/content/drive/MyDrive/Dengue_prediction/datathon", 'features_transformer.pkl'), 'wb') as pkl_file:
                    pickle.dump(train_features, pkl_file)
with open(os.path.join("/content/drive/MyDrive/Dengue_prediction/datathon", 'labels_transformer.pkl'), 'wb') as pkl_file:
                    pickle.dump(train_labels, pkl_file)
"""

'\nimport pickle\nwith open(os.path.join("/content/drive/MyDrive/Dengue_prediction/datathon", \'features_transformer.pkl\'), \'wb\') as pkl_file:\n                    pickle.dump(train_features, pkl_file)\nwith open(os.path.join("/content/drive/MyDrive/Dengue_prediction/datathon", \'labels_transformer.pkl\'), \'wb\') as pkl_file:\n                    pickle.dump(train_labels, pkl_file)\n'