This project is aimed at overcoming scene context constraints in object detection under uncontrolled acquisition environments using image defilters and inter-image detection models. The project involves the following steps:

**Description**

1. Firstly, the type of distortion affecting the image is determined using machine learning classifiers.
2. Once the type of distortion is determined, the corresponding defilters are used to remove the distortion from the image.
3. The reconstructed images are then passed through an object detection model to detect objects.

This notebook is divided into three parts:

**Machine learning classifiers**

1.In Machine learning classifier(cell) we will import 3 Ml_classifiers and get the type of distortion for the given image according to the proposed approach

**De_Filters**

2.Here we will do defiltering according to the type of distortion that image has affected

**InternImage Object Detection**

3.Here we do object detection using the internimage_xl_3 model and save the results in .json format

There are some changes need to be done before running the cells, we have instructed when ever there is a change with comments before running the cell. Please check it!



In order to examine the complete work flow, we have included some sample images taken from the test folder.

#Machine learning classifiers

import libraries

In [None]:
import cv2 
import numpy as np
from google.colab.patches import cv2_imshow
import pandas as pd
import matplotlib.pyplot as plt
import keras
import tensorflow
import seaborn as sns
import os
import shutil 
import os.path
import glob
from skimage.io import imread, imshow
from skimage.exposure import histogram
from google.colab.patches import cv2_imshow
import skimage.feature as feature
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
import matplotlib.pyplot as plot
import zipfile
import joblib

In [None]:
#local binary patters
class LocalBinaryPatterns:
  def __init__(self, numPoints, radius):
    self.numPoints = numPoints
    self.radius = radius

  def describe(self, image, eps = 1e-9):
    lbp = feature.local_binary_pattern(image, self.numPoints, self.radius, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, self.numPoints+3), range=(0, self.numPoints + 2))

    # Normalize the histogram
    hist = hist.astype('float')
    hist /= (hist.sum() + eps)

    return hist, lbp

In [None]:
#glcm_texture_feature_and_local_binary_pattern
def texture_feature_GLCM_LBP(image_folder):
  df_texture=[]
  Image_name=[]
  all_images=os.listdir(image_folder)
  for i in range(len(all_images)):
    image=cv2.imread(image_folder+'/'+all_images[i])
    image = cv2.resize(image, (256, 256))
    Image_name.append(all_images[i])
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    graycom = feature.graycomatrix(gray, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256)
    contrast=feature.graycoprops(graycom, 'contrast')
    contrast=contrast.flatten()
    dissimilarity=feature.graycoprops(graycom, 'dissimilarity')
    dissimilarity=dissimilarity.flatten()
    homogeneity=feature.graycoprops(graycom, 'homogeneity')
    homogeneity=homogeneity.flatten()
    energy=feature.graycoprops(graycom, 'energy')
    energy=energy.flatten()
    correlation=feature.graycoprops(graycom, 'correlation')
    correlation=correlation.flatten()
    ASM=feature.graycoprops(graycom, 'ASM')
    desc = LocalBinaryPatterns(32, 2)
    hist, lbp = desc.describe(gray)
    hist = hist.flatten()
    ASM=ASM.flatten()
    final_features=np.concatenate((contrast,dissimilarity,homogeneity,energy,correlation,ASM,hist))
    final_features=final_features.flatten()
    final_features=np.reshape(final_features,(1,58))
    df=pd.DataFrame(final_features)
    df_texture.append(df)
  final_df = pd.concat(df_texture)
  return final_df
  #graycoprops

In [None]:
#glcm_texture_feature_and_local_binary_pattern _as_dataframe
def dat_out(img_path):
  df=texture_feature_GLCM_LBP(str(img_path))
  #df=texture_feature_GLCM(str(img_path))  
  #df=color_histogram_feature_extraction(str(img_path))
  all_images=os.listdir(str(img_path))
  df['Image_Name']=all_images
  #df['label']=str(label)
  return df


**Test images classification into noise types**


Download Test data

In [None]:
##!wget https://www.l2ti.univ-paris13.fr/VSQuad/CD-COCO_ICIP2023_Challenge/test2017_distorted.zip

Unzip the Test Data

In [None]:
#!unzip /content/test2017_distorted.zip -d /content/Test_data

In [None]:
#sample set with 20 images for testing entire code
#!unzip /content/Test_samples.zip -d /content/Test_data

test data texture features

In [None]:
#path to test images 
test = dat_out('/content/Test_data/not_defiltered')
test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,49,50,51,52,53,54,55,56,57,Image_Name
0,1148.782904,1906.38619,1214.902267,1839.852564,9.843842,14.145452,10.915931,14.447166,0.696191,0.663439,...,0.005753,0.002579,0.011398,0.00444,0.008987,0.004929,0.006256,0.533005,0.19574,000000004762.jpg
0,126.96489,90.412318,141.727482,313.314556,6.034804,6.078108,6.989032,9.891565,0.316765,0.247216,...,0.013504,0.012054,0.01355,0.012009,0.012161,0.009308,0.006653,0.031677,0.174103,000000000529.jpg
0,470.039047,830.929519,719.39568,922.490334,11.596492,15.971734,14.191176,17.053687,0.320798,0.259979,...,0.010406,0.007599,0.014069,0.011551,0.016418,0.015121,0.019394,0.174744,0.346909,000000002894.jpg
0,107.075766,203.554494,109.879856,188.549666,3.861213,5.189097,3.657031,5.11554,0.445987,0.418335,...,0.007675,0.005875,0.010681,0.008698,0.012924,0.010544,0.010071,0.16774,0.38858,000000001895.jpg
0,1046.502328,1283.321123,851.138434,1338.356478,19.86682,22.238862,16.406633,22.725229,0.093774,0.086055,...,0.007767,0.008896,0.010025,0.014694,0.019669,0.023026,0.02739,0.081787,0.493958,000000061076.jpg
0,1213.041682,1469.054118,473.50265,1228.983791,17.272748,19.356863,9.569593,17.879477,0.182601,0.161108,...,0.00972,0.010757,0.013336,0.015472,0.019928,0.019775,0.024429,0.074158,0.452606,000000012096.jpg
0,230.978523,297.048997,232.566866,263.418055,10.690104,11.433802,10.62716,11.061853,0.101107,0.098363,...,0.003433,0.004547,0.005936,0.009415,0.015579,0.02504,0.038788,0.11702,0.532394,000000000716.jpg
0,1979.38364,2839.523076,1864.516942,2969.083353,19.3822,26.350265,20.052512,27.022422,0.385748,0.326782,...,0.004623,0.005508,0.006958,0.007523,0.011032,0.011215,0.015427,0.259689,0.320572,000000007444.jpg
0,458.389246,766.569673,520.68848,878.4993,9.978952,14.14519,10.720496,14.61584,0.344791,0.234587,...,0.012924,0.010834,0.016129,0.014969,0.020523,0.016861,0.019333,0.111954,0.343765,000000003793.jpg
0,577.683609,709.903945,432.318995,706.124921,13.541605,15.621376,12.231127,15.57301,0.164253,0.141622,...,0.009003,0.009491,0.01088,0.013092,0.018097,0.018646,0.023483,0.079437,0.456207,000000002890.jpg


In [None]:
#test.to_csv('/content/test_feat.csv',index=False)

In [None]:
#drop Filename column
test_x=test.iloc[:,0:-1].values
test_x

array([[1.14878290e+03, 1.90638619e+03, 1.21490227e+03, ...,
        6.25610352e-03, 5.33004761e-01, 1.95739746e-01],
       [1.26964890e+02, 9.04123183e+01, 1.41727482e+02, ...,
        6.65283203e-03, 3.16772461e-02, 1.74102783e-01],
       [4.70039047e+02, 8.30929519e+02, 7.19395680e+02, ...,
        1.93939209e-02, 1.74743652e-01, 3.46908569e-01],
       ...,
       [5.36668183e+02, 7.72166905e+02, 5.14547595e+02, ...,
        2.30560303e-02, 8.81652832e-02, 4.85382080e-01],
       [1.93214614e+02, 1.81923214e+02, 1.39446967e+02, ...,
        3.93676758e-03, 7.42950439e-01, 6.83288574e-02],
       [8.16791621e+02, 1.53036460e+03, 1.25571183e+03, ...,
        3.04718018e-02, 8.65173340e-02, 4.92034912e-01]])

In [None]:
test_x.shape

(24, 58)

In [None]:
## load the classifier models 
## we have divided the distortion into 6 types Haze,Blur(motionblur,defocus blur--local),Enchacement(constrast changing,local backlight illumination),rain,Noise,Compress
## label = 'haze': 0, 'blur': 1, 'enchancement' :2, 'rain':3, 'noise':4,'compress':5

#path to machine learning models
import pickle
xgb = pickle.load(open('/content/Test_data/ml_classifier models/xgb_cls_4april.sav', 'rb'))
xgb_pred = xgb.predict(test_x)
print(xgb_pred)

gbc = pickle.load(open('/content/Test_data/ml_classifier models/GradientBoostingClassifier_4april.sav', 'rb'))
gbc_pred = gbc.predict(test_x)
print(gbc_pred)

lgb = pickle.load(open('/content/Test_data/ml_classifier models/LGBMClassifier_4april.sav', 'rb'))
lgb_pred = lgb.predict(test_x)
print(lgb_pred)

  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.

[2 1 5 2 3 3 4 2 5 0 1 4 4 2 0 0 5 1 4 0 1 3 5 4]
[2 1 5 2 3 3 4 2 5 0 1 4 4 2 0 0 5 1 4 0 1 3 5 4]
[2 1 5 2 3 3 4 2 5 0 1 4 4 2 0 0 5 1 4 0 1 3 5 4]


In [None]:
#check no.of samples for each class
from collections import Counter
print(Counter(xgb_pred).keys(), Counter(xgb_pred).values())
print('/n')
print(Counter(gbc_pred).keys() ,Counter(gbc_pred).values())
print('/n')
print(Counter(lgb_pred).keys(),Counter(lgb_pred).values() )

dict_keys([2, 1, 5, 3, 4, 0]) dict_values([4, 4, 4, 3, 5, 4])
/n
dict_keys([2, 1, 5, 3, 4, 0]) dict_values([4, 4, 4, 3, 5, 4])
/n
dict_keys([2, 1, 5, 3, 4, 0]) dict_values([4, 4, 4, 3, 5, 4])


In [None]:
## Concatenate the classifier predictions to DataFrame
predictions=pd.concat([pd.DataFrame(xgb_pred,columns=['xgb']), pd.DataFrame(gbc_pred,columns=['gbc']),  pd.DataFrame(lgb_pred,columns=['lgb']) ] ,axis=1)
final=pd.concat([test['Image_Name'].reset_index(),predictions],axis=1)
final

Unnamed: 0,index,Image_Name,xgb,gbc,lgb
0,0,000000004762.jpg,2,2,2
1,0,000000000529.jpg,1,1,1
2,0,000000002894.jpg,5,5,5
3,0,000000001895.jpg,2,2,2
4,0,000000061076.jpg,3,3,3
5,0,000000012096.jpg,3,3,3
6,0,000000000716.jpg,4,4,4
7,0,000000007444.jpg,2,2,2
8,0,000000003793.jpg,5,5,5
9,0,000000002890.jpg,0,0,0


In [None]:
#Get the most frequest output from the classifier models
d=final[['xgb',	'gbc',	'lgb']].mode(axis=1)
final_2= pd.concat([d[0].reset_index(),final ], axis=1)
final_2

Unnamed: 0,index,0,index.1,Image_Name,xgb,gbc,lgb
0,0,2,0,000000004762.jpg,2,2,2
1,1,1,0,000000000529.jpg,1,1,1
2,2,5,0,000000002894.jpg,5,5,5
3,3,2,0,000000001895.jpg,2,2,2
4,4,3,0,000000061076.jpg,3,3,3
5,5,3,0,000000012096.jpg,3,3,3
6,6,4,0,000000000716.jpg,4,4,4
7,7,2,0,000000007444.jpg,2,2,2
8,8,5,0,000000003793.jpg,5,5,5
9,9,0,0,000000002890.jpg,0,0,0


In [None]:
final_2=final_2.drop(['index'],axis=1)

In [None]:
final_2

Unnamed: 0,0,Image_Name,xgb,gbc,lgb
0,2,000000004762.jpg,2,2,2
1,1,000000000529.jpg,1,1,1
2,5,000000002894.jpg,5,5,5
3,2,000000001895.jpg,2,2,2
4,3,000000061076.jpg,3,3,3
5,3,000000012096.jpg,3,3,3
6,4,000000000716.jpg,4,4,4
7,2,000000007444.jpg,2,2,2
8,5,000000003793.jpg,5,5,5
9,0,000000002890.jpg,0,0,0


In [None]:
#separate the noise effected images(file name)  into dataframe columns 
file_name = final_2['Image_Name'].tolist()
label = final_2[0].tolist()
H=[]
B=[]
E=[]
R=[]
N=[]
C=[]
for i in range(len(label)):
  if label[i] == 0:
    H.append(file_name[i])
  elif label[i] == 1:
    B.append(file_name[i])      
  elif label[i] == 2:
    E.append(file_name[i]) 
  elif label[i] == 3:
    R.append(file_name[i])   
  elif label[i] == 4:
    N.append(file_name[i]) 
  else :
    C.append(file_name[i])
    
fin_dt = pd.concat([ pd.DataFrame(H,columns=['haze']), pd.DataFrame(B,columns=['blur']), pd.DataFrame(E,columns=['enchance']), 
           pd.DataFrame(R,columns=['rain']), pd.DataFrame(N,columns=['noise']), pd.DataFrame(C,columns=['compress'])],axis=1 )
fin_dt   

Unnamed: 0,haze,blur,enchance,rain,noise,compress
0,000000002890.jpg,000000000529.jpg,000000004762.jpg,000000061076.jpg,000000000716.jpg,000000002894.jpg
1,000000023760.jpg,000000000077.jpg,000000001895.jpg,000000012096.jpg,000000002429.jpg,000000003793.jpg
2,000000010142.jpg,000000006040.jpg,000000007444.jpg,000000042050.jpg,000000000257.jpg,000000005809.jpg
3,000000023429.jpg,000000000827.jpg,000000000138.jpg,,000000002415.jpg,000000007784.jpg
4,,,,,000000005883.jpg,


In [None]:
#fin_dt.to_csv('/content/classification_report.csv',index=False)

##copying files

we know the type of distortion that images are affected,now we will copy the respective images to corresponding folders

In [None]:
import os
os.mkdir('/content/test_data')
os.mkdir('/content/test_data/haze')
os.mkdir('/content/test_data/blur')
os.mkdir('/content/test_data/enchance')

os.mkdir('/content/test_data/rain')
os.mkdir('/content/test_data/noise')
os.mkdir('/content/test_data/compress')

In [None]:
### copying the test images to separate folder

def copii(dt,class_lab,source_path,destination_path):
  import shutil
  ##files = dt[str(class_lab)].to_list()
  files=dt[str(class_lab)].dropna()
  files=files.to_list()
  for i in range(len(files)):
    shutil.copy(source_path+'/'+str(files[i]),destination_path+'/')

					

In [None]:
f=['haze',	'blur',	'enchance',	'rain',	'noise',	'compress']
for i in range(len(f)):
  copii(fin_dt,f[i],'/content/Test_data/not_defiltered','/content/test_data/'+f[i])

In [None]:
##!zip -r /content/test_data_maxim.zip /content/test_data

#De_Filters

###maxim-Noise,Rain,Blur,Haze

In [None]:
#make defilter directory to save the defilter images
os.mkdir('/content/defilter')
os.mkdir('/content/defilter/decompression')
os.mkdir('/content/defilter/deblur')
os.mkdir('/content/defilter/dehaze')

os.mkdir('/content/defilter/denoise')
os.mkdir('/content/defilter/derain')
os.mkdir('/content/defilter/de_enchacement')


In [None]:
#path to save defilter models
os.mkdir('/content/models')
os.mkdir('/content/models/de_enchacement')
os.mkdir('/content/models/deblur')
os.mkdir('/content/models/dehaze')
os.mkdir('/content/models/denoise')
os.mkdir('/content/models/derain')

In [None]:
%cd /content/models/deblur
!wget https://storage.googleapis.com/gresearch/maxim/ckpt/Deblurring/RealBlur_R/checkpoint.npz

/content/models/deblur
--2023-05-05 02:08:50--  https://storage.googleapis.com/gresearch/maxim/ckpt/Deblurring/RealBlur_R/checkpoint.npz
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.24.128, 142.251.10.128, 142.251.12.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.24.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 270178074 (258M) [application/octet-stream]
Saving to: ‘checkpoint.npz’


2023-05-05 02:09:04 (18.4 MB/s) - ‘checkpoint.npz’ saved [270178074/270178074]



In [None]:
%cd /content/models/derain
!wget https://storage.googleapis.com/gresearch/maxim/ckpt/Deraining/Rain13k/checkpoint.npz

/content/models/derain
--2023-05-05 02:09:04--  https://storage.googleapis.com/gresearch/maxim/ckpt/Deraining/Rain13k/checkpoint.npz
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.24.128, 142.251.10.128, 142.251.12.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.24.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 172436430 (164M) [application/octet-stream]
Saving to: ‘checkpoint.npz’


2023-05-05 02:09:15 (17.3 MB/s) - ‘checkpoint.npz’ saved [172436430/172436430]



In [None]:
%cd /content/models/denoise
!wget https://storage.googleapis.com/gresearch/maxim/ckpt/Denoising/SIDD/checkpoint.npz

/content/models/denoise
--2023-05-05 02:09:15--  https://storage.googleapis.com/gresearch/maxim/ckpt/Denoising/SIDD/checkpoint.npz
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.24.128, 142.251.10.128, 142.251.12.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.24.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 270178074 (258M) [application/octet-stream]
Saving to: ‘checkpoint.npz’


2023-05-05 02:09:29 (18.7 MB/s) - ‘checkpoint.npz’ saved [270178074/270178074]



In [None]:
%cd /content/models/dehaze
!wget https://storage.googleapis.com/gresearch/maxim/ckpt/Dehazing/SOTS-Outdoor/checkpoint.npz

/content/models/dehaze
--2023-05-05 02:09:29--  https://storage.googleapis.com/gresearch/maxim/ckpt/Dehazing/SOTS-Outdoor/checkpoint.npz
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.10.128, 142.251.12.128, 172.217.194.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.10.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 172436430 (164M) [application/octet-stream]
Saving to: ‘checkpoint.npz’


2023-05-05 02:09:41 (14.3 MB/s) - ‘checkpoint.npz’ saved [172436430/172436430]



In [None]:
%cd /content/models/de_enchacement
!wget https://storage.googleapis.com/gresearch/maxim/ckpt/Enhancement/FiveK/checkpoint.npz

/content/models/de_enchacement
--2023-05-05 02:09:41--  https://storage.googleapis.com/gresearch/maxim/ckpt/Enhancement/FiveK/checkpoint.npz
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.10.128, 142.251.12.128, 172.217.194.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.10.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 172436430 (164M) [application/octet-stream]
Saving to: ‘checkpoint.npz’


2023-05-05 02:09:50 (17.5 MB/s) - ‘checkpoint.npz’ saved [172436430/172436430]



####libraries

In [None]:
%cd /content/
! git clone https://github.com/google-research/maxim/
%cd ./maxim

!pip install -r requirements.txt
!pip install --upgrade jax
! pip install gdown

!python setup.py build
! python setup.py install

# https://console.cloud.google.com/storage/browser/gresearch/maxim/ckpt/Enhancement/FiveK;tab=objects?prefix=&forceOnObjectsSortingFiltering=false

/content
Cloning into 'maxim'...
remote: Enumerating objects: 131, done.[K
remote: Counting objects: 100% (43/43), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 131 (delta 26), reused 21 (delta 14), pack-reused 88[K
Receiving objects: 100% (131/131), 10.70 MiB | 6.04 MiB/s, done.
Resolving deltas: 100% (47/47), done.
/content/maxim
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
running build
running build_py
creating build
creating build/lib
creating build/lib/maxim
copying maxim/predict.py -> build/lib/maxim
copying maxim/__init__.py -> build/lib/maxim
copying maxim/test_maxim.py -> build/lib/maxim
copying maxim/run_eval.py -> build/lib/maxim
creating build/lib/maxim/models
copying maxim/models/__init__.py ->

In [None]:
from google.colab import drive # works only for colab
from PIL import Image

import matplotlib.pyplot as plt
import collections
import importlib
import io
import os
import math
import requests
from tqdm import tqdm
import gdown # to download weights from Drive

import flax
import jax.numpy as jnp
import ml_collections
import numpy as np
import tensorflow as tf
from jax.experimental import jax2tf


# below code lines are from run_eval.py
_MODEL_FILENAME = 'maxim'

_MODEL_VARIANT_DICT = {
    'Denoising': 'S-3',
    'Deblurring': 'S-3',
    'Deraining': 'S-2',
    'Dehazing': 'S-2',
    'Enhancement': 'S-2',
}

_MODEL_CONFIGS = {
    'variant': '',
    'dropout_rate': 0.0,
    'num_outputs': 3,
    'use_bias': True,
    'num_supervision_scales': 3,
}


In [None]:
def sizeof_fmt(size, suffix='B'):
    """Get human readable file size.
    Args:
        size (int): File size.
        suffix (str): Suffix. Default: 'B'.
    Return:
        str: Formated file siz.
    """
    for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
        if abs(size) < 1024.0:
            return f'{size:3.1f} {unit}{suffix}'
        size /= 1024.0
    return f'{size:3.1f} Y{suffix}'


def download_file_from_google_drive(file_id, save_path):
    """Download files from google drive.

    Ref:
    https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive  # noqa E501

    Args:
        file_id (str): File id.
        save_path (str): Save path.
    """

    session = requests.Session()
    URL = 'https://docs.google.com/uc?export=download'
    params = {'id': file_id}

    response = session.get(URL, params=params, stream=True)
    token = get_confirm_token(response)
    if token:
        params['confirm'] = token
        response = session.get(URL, params=params, stream=True)

    # get file size
    response_file_size = session.get(
        URL, params=params, stream=True, headers={'Range': 'bytes=0-2'})
    if 'Content-Range' in response_file_size.headers:
        file_size = int(
            response_file_size.headers['Content-Range'].split('/')[1])
    else:
        file_size = None

    save_response_content(response, save_path, file_size)


def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value
    return None


def save_response_content(response,
                          destination,
                          file_size=None,
                          chunk_size=32768):
    if file_size is not None:
        pbar = tqdm(total=math.ceil(file_size / chunk_size), unit='chunk')

        readable_file_size = sizeof_fmt(file_size)
    else:
        pbar = None

    with open(destination, 'wb') as f:
        downloaded_size = 0
        for chunk in response.iter_content(chunk_size):
            downloaded_size += chunk_size
            if pbar is not None:
                pbar.update(1)
                pbar.set_description(f'Download {sizeof_fmt(downloaded_size)} '
                                     f'/ {readable_file_size}')
            if chunk:  # filter out keep-alive new chunks
                f.write(chunk)
        if pbar is not None:
            pbar.close()

In [None]:

def resize(path, new_width_height = 1280, save_image = False, convert_RGB = True, clip_full_hd = False, quality = 100):
  '''
  Resize and return Given Image
  args:
    path: Image Path
    new_width_height = Reshaped image's width and height. # If integer is given, it'll keep the aspect ratio as it is by shrinking the Bigger dimension (width or height) to the max of new_width_height  and then shring the smaller dimension accordingly 
    save_image = Whether to save the image or not
    convert_RGB: Whether to Convert the RGBA image to RGB (by default backgroud is white)
  '''
  image = Image.open(path)
  w, h = image.size

  fixed_size = new_width_height if isinstance(new_width_height, int) else False

  if fixed_size:
    if h > w:
      fixed_height = fixed_size
      height_percent = (fixed_height / float(h))
      width_size = int((float(w) * float(height_percent)))
      image = image.resize((width_size, fixed_height), Image.NEAREST)

    else:
      fixed_width = fixed_size
      width_percent = (fixed_width / float(w))
      height_size = int((float(h) * float(width_percent)))
      image = image.resize((fixed_width, height_size), Image.NEAREST) # Try Image.ANTIALIAS inplace of Image.NEAREST

  else:
    image = image.resize(new_width_height)

  if image.mode == "RGBA" and convert_RGB:
    # image.load() # required for png.split()
    # new = Image.new("RGB", image.size, (255, 255, 255)) # White Background
    # image = new.paste(image, mask=image.split()[3]) # 3 is the alpha channel

    new = Image.new("RGBA", image.size, "WHITE") # Create a white rgba background
    new.paste(image, (0, 0), image) # Paste the image on the background.
    image = new.convert('RGB')

  if save_image:
    image.save(path, quality = quality)

  return image


class DummyFlags():
  def __init__(self, ckpt_path:str, task:str, input_dir: str = "./maxim/images/Enhancement", output_dir:str = "./maxim/images/Results", has_target:bool = False, save_images:bool = True, geometric_ensemble:bool = False):
    '''
    Builds the dummy flags which replicates the behaviour of Terminal CLI execution (same as ArgParse)
    args:
      ckpt_path: Saved Model CheckPoint: Find all the checkpoints for pre trained models at https://console.cloud.google.com/storage/browser/gresearch/maxim/ckpt/
      task: Task for which the model waas trained. Each task uses different Data and Checkpoints. Find the details of tasks and respective checkpoints details at: https://github.com/google-research/maxim#results-and-pre-trained-models
      input_dir: Input Directory. We do not need it here as we are directly passing one image at a time
      output_dir: Also not needed in out code
      has_target: Used to calculate PSNR and SSIM calculation. Not needed in our case
      save_images: Used in CLI command where images were saved in loop. Not needed in our case
      geometric_ensemble: Was used in training part and as it is just an Inference part, it is not needed

    '''
    self.ckpt_path = ckpt_path
    self.task = task
    self.input_dir = input_dir
    self.output_dir = output_dir
    self.has_target = has_target
    self.save_images = save_images
    self.geometric_ensemble = geometric_ensemble


In [None]:
# Copyright 2022 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def recover_tree(keys, values):
  """Recovers a tree as a nested dict from flat names and values.

  This function is useful to analyze checkpoints that are saved by our programs
  without need to access the exact source code of the experiment. In particular,
  it can be used to extract an reuse various subtrees of the scheckpoint, e.g.
  subtree of parameters.
  Args:
    keys: a list of keys, where '/' is used as separator between nodes.
    values: a list of leaf values.
  Returns:
    A nested tree-like dict.
  """
  tree = {}
  sub_trees = collections.defaultdict(list)
  for k, v in zip(keys, values):
    if '/' not in k:
      tree[k] = v
    else:
      k_left, k_right = k.split('/', 1)
      sub_trees[k_left].append((k_right, v))
  for k, kv_pairs in sub_trees.items():
    k_subtree, v_subtree = zip(*kv_pairs)
    tree[k] = recover_tree(k_subtree, v_subtree)
  return tree


def mod_padding_symmetric(image, factor=64):
  """Padding the image to be divided by factor."""
  height, width = image.shape[0], image.shape[1]
  height_pad, width_pad = ((height + factor) // factor) * factor, (
      (width + factor) // factor) * factor
  padh = height_pad - height if height % factor != 0 else 0
  padw = width_pad - width if width % factor != 0 else 0
  image = jnp.pad(
      image, [(padh // 2, padh // 2), (padw // 2, padw // 2), (0, 0)],
      mode='reflect')
  return image


def get_params(ckpt_path):
  """Get params checkpoint."""

  with tf.io.gfile.GFile(ckpt_path, 'rb') as f:
    data = f.read()
  values = np.load(io.BytesIO(data))
  params = recover_tree(*zip(*values.items()))
  params = params['opt']['target']

  return params


def calculate_psnr(img1, img2, crop_border, test_y_channel=False):
  """Calculate PSNR (Peak Signal-to-Noise Ratio).

  Ref: https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
  Args:
    img1 (ndarray): Images with range [0, 255].
    img2 (ndarray): Images with range [0, 255].
    crop_border (int): Cropped pixels in each edge of an image. These
        pixels are not involved in the PSNR calculation.
    test_y_channel (bool): Test on Y channel of YCbCr. Default: False.
  Returns:
    float: psnr result.
  """
  assert img1.shape == img2.shape, (
      f'Image shapes are differnet: {img1.shape}, {img2.shape}.')
  img1 = img1.astype(np.float64)
  img2 = img2.astype(np.float64)

  if crop_border != 0:
    img1 = img1[crop_border:-crop_border, crop_border:-crop_border, ...]
    img2 = img2[crop_border:-crop_border, crop_border:-crop_border, ...]

  if test_y_channel:
    img1 = to_y_channel(img1)
    img2 = to_y_channel(img2)

  mse = np.mean((img1 - img2)**2)
  if mse == 0:
    return float('inf')
  return 20. * np.log10(255. / np.sqrt(mse))


def _convert_input_type_range(img):
  """Convert the type and range of the input image.

  It converts the input image to np.float32 type and range of [0, 1].
  It is mainly used for pre-processing the input image in colorspace
  convertion functions such as rgb2ycbcr and ycbcr2rgb.
  Args:
    img (ndarray): The input image. It accepts:
        1. np.uint8 type with range [0, 255];
        2. np.float32 type with range [0, 1].
  Returns:
      (ndarray): The converted image with type of np.float32 and range of
          [0, 1].
  """
  img_type = img.dtype
  img = img.astype(np.float32)
  if img_type == np.float32:
    pass
  elif img_type == np.uint8:
    img /= 255.
  else:
    raise TypeError('The img type should be np.float32 or np.uint8, '
                    f'but got {img_type}')
  return img


def _convert_output_type_range(img, dst_type):
  """Convert the type and range of the image according to dst_type.

  It converts the image to desired type and range. If `dst_type` is np.uint8,
  images will be converted to np.uint8 type with range [0, 255]. If
  `dst_type` is np.float32, it converts the image to np.float32 type with
  range [0, 1].
  It is mainly used for post-processing images in colorspace convertion
  functions such as rgb2ycbcr and ycbcr2rgb.
  Args:
    img (ndarray): The image to be converted with np.float32 type and
        range [0, 255].
    dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it
        converts the image to np.uint8 type with range [0, 255]. If
        dst_type is np.float32, it converts the image to np.float32 type
        with range [0, 1].
  Returns:
    (ndarray): The converted image with desired type and range.
  """
  if dst_type not in (np.uint8, np.float32):
    raise TypeError('The dst_type should be np.float32 or np.uint8, '
                    f'but got {dst_type}')
  if dst_type == np.uint8:
    img = img.round()
  else:
    img /= 255.

  return img.astype(dst_type)


def rgb2ycbcr(img, y_only=False):
  """Convert a RGB image to YCbCr image.

  This function produces the same results as Matlab's `rgb2ycbcr` function.
  It implements the ITU-R BT.601 conversion for standard-definition
  television. See more details in
  https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
  It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`.
  In OpenCV, it implements a JPEG conversion. See more details in
  https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.

  Args:
    img (ndarray): The input image. It accepts:
        1. np.uint8 type with range [0, 255];
        2. np.float32 type with range [0, 1].
    y_only (bool): Whether to only return Y channel. Default: False.
  Returns:
    ndarray: The converted YCbCr image. The output image has the same type
        and range as input image.
  """
  img_type = img.dtype
  img = _convert_input_type_range(img)
  if y_only:
    out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0
  else:
    out_img = np.matmul(img,
                        [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
                         [24.966, 112.0, -18.214]]) + [16, 128, 128]
  out_img = _convert_output_type_range(out_img, img_type)
  return out_img


def to_y_channel(img):
  """Change to Y channel of YCbCr.

  Args:
    img (ndarray): Images with range [0, 255].
  Returns:
    (ndarray): Images with range [0, 255] (float type) without round.
  """
  img = img.astype(np.float32) / 255.
  if img.ndim == 3 and img.shape[2] == 3:
    img = rgb2ycbcr(img, y_only=True)
    img = img[..., None]
  return img * 255.


def augment_image(image, times=8):
  """Geometric augmentation."""
  if times == 4:  # only rotate image
    images = []
    for k in range(0, 4):
      images.append(np.rot90(image, k=k))
    images = np.stack(images, axis=0)
  elif times == 8:  # roate and flip image
    images = []
    for k in range(0, 4):
      images.append(np.rot90(image, k=k))
    image = np.fliplr(image)
    for k in range(0, 4):
      images.append(np.rot90(image, k=k))
    images = np.stack(images, axis=0)
  else:
    raise Exception(f'Error times: {times}')
  return images


def deaugment_image(images, times=8):
  """Reverse the geometric augmentation."""

  if times == 4:  # only rotate image
    image = []
    for k in range(0, 4):
      image.append(np.rot90(images[k], k=4-k))
    image = np.stack(image, axis=0)
    image = np.mean(image, axis=0)
  elif times == 8:  # roate and flip image
    image = []
    for k in range(0, 4):
      image.append(np.rot90(images[k], k=4-k))
    for k in range(0, 4):
      image.append(np.fliplr(np.rot90(images[4+k], k=4-k)))
    image = np.mean(image, axis=0)
  else:
    raise Exception(f'Error times: {times}')
  return image


def is_image_file(filename):
  """Check if it is an valid image file by extension."""
  return any(
      filename.endswith(extension)
      for extension in ['jpeg', 'JPEG', 'jpg', 'png', 'JPG', 'PNG', 'gif'])


def save_img(img, pth):
  """Save an image to disk.

  Args:
    img: jnp.ndarry, [height, width, channels], img will be clipped to [0, 1]
      before saved to pth.
    pth: string, path to save the image to.
  """
  Image.fromarray(np.array(
      (np.clip(img, 0., 1.) * 255.).astype(jnp.uint8))).save(pth, 'PNG')


def make_shape_even(image):
  """Pad the image to have even shapes."""
  height, width = image.shape[0], image.shape[1]
  padh = 1 if height % 2 != 0 else 0
  padw = 1 if width % 2 != 0 else 0
  image = jnp.pad(image, [(0, padh), (0, padw), (0, 0)], mode='reflect')
  return image


# Refactored code --------------------------------------------------------------------------------------------------------------------
"""
def build_model(task = "Deblurring"):
  model_mod = importlib.import_module(f'maxim.models.{_MODEL_FILENAME}')
  model_configs = ml_collections.ConfigDict(_MODEL_CONFIGS)

  model_configs.variant = _MODEL_VARIANT_DICT[task]

  model = model_mod.Model(**model_configs)
  return model
"""

def pre_process(input_file):
  '''
  Pre-process the image before sending to the model
  '''
  input_img = np.asarray(Image.open(input_file).convert('RGB'),np.float32) / 255.
  # Padding images to have even shapes
  height, width = input_img.shape[0], input_img.shape[1]
  input_img = make_shape_even(input_img)
  height_even, width_even = input_img.shape[0], input_img.shape[1]

  # padding images to be multiplies of 64
  input_img = mod_padding_symmetric(input_img, factor=64)
  input_img = np.expand_dims(input_img, axis=0)

  return input_img, height, width, height_even, width_even


def predict(input_img):
  # handle multi-stage outputs, obtain the last scale output of last stage
  return model.apply({'params': flax.core.freeze(params)}, input_img)


def post_process(preds, height, width, height_even, width_even):
  '''
  Post process the image coming out from prediction
  '''
  if isinstance(preds, list):
    preds = preds[-1]
    if isinstance(preds, list):
      preds = preds[-1]

  # De-ensemble by averaging inferenced results.
  preds = np.array(preds[0], np.float32)

  # unpad images to get the original resolution
  new_height, new_width = preds.shape[0], preds.shape[1]
  h_start = new_height // 2 - height_even // 2
  h_end = h_start + height
  w_start = new_width // 2 - width_even // 2
  w_end = w_start + width
  preds = preds[h_start:h_end, w_start:w_end, :]
  return np.array((np.clip(preds, 0., 1.) * 255.).astype(jnp.uint8))

####train

    'Denoising': 'S-3',
    'Deblurring': 'S-3',
    'Deraining': 'S-2',
    'Dehazing': 'S-2',
    'Enhancement': 'S-2',

In [None]:
_MODEL_FILENAME = 'maxim'

_MODEL_VARIANT_DICT = {
    'Denoising': 'S-3',
    'Deblurring': 'S-3',
    'Deraining': 'S-2',
    'Dehazing': 'S-2',
    'Enhancement': 'S-2',
}

_MODEL_CONFIGS = {
    'variant': '',
    'dropout_rate': 0.0,
    'num_outputs': 3,
    'use_bias': True,
    'num_supervision_scales': 3,
}


In [None]:
def build_model(task):
  model_mod = importlib.import_module(f'maxim.models.{_MODEL_FILENAME}')
  model_configs = ml_collections.ConfigDict(_MODEL_CONFIGS)

  model_configs.variant = _MODEL_VARIANT_DICT[task]

  model = model_mod.Model(**model_configs)
  return model

def predict(input_img,model,params):
  # handle multi-stage outputs, obtain the last scale output of last stage
  return model.apply({'params': flax.core.freeze(params)}, input_img)



In [None]:
def mod_train(Model_path,task1):
  MODEL_PATH_H = str(Model_path) # name of the model to be saved as
  FLAGS_H = DummyFlags(ckpt_path = MODEL_PATH_H, task =str(task1)) # Path to your checkpoint and task name
  params_H = get_params(FLAGS_H.ckpt_path) # Parse the config
  model_H = build_model(task = str(task1))
  return model_H,params_H


In [None]:
"""
def inference(model_N,params_N,img_path,des_path):
  import matplotlib.image
  for file in os.listdir(img_path):
    image_bytes=str(img_path)+'/'+file
    input_img, height, width, height_even, width_even = pre_process(image_bytes)
    preds = predict(input_img,model_N,params_N)
    result = post_process(preds, height, width, height_even, width_even)
    matplotlib.image.imsave(str(des_path)+'/'+file, result)
"""

In [None]:
def inference(model_N,params_N,img_path,des_path,dt,label):
  import matplotlib.image
  dt=dt[str(label)].dropna() 
  file_1=dt.tolist()
  print(file_1)
  for i in range(len(file_1)):
    image_bytes=str(img_path)+'/'+file_1[i]
    input_img, height, width, height_even, width_even = pre_process(image_bytes)
    preds = predict(input_img,model_N,params_N)
    result = post_process(preds, height, width, height_even, width_even)
    matplotlib.image.imsave(str(des_path)+'/'+file_1[i], result)

In [None]:
#model_E,params_E =mod_train('/content/models/de_enchacement/checkpoint.npz','Enhancement')

In [None]:
model_B,params_B =mod_train('/content/models/deblur/checkpoint.npz','Deblurring')

In [None]:
model_H,params_H =mod_train('/content/models/dehaze/checkpoint.npz','Dehazing')

In [None]:
model_N,params_N =mod_train('/content/models/denoise/checkpoint.npz','Denoising')

In [None]:
model_R,params_R =mod_train('/content/models/derain/checkpoint.npz','Deraining')

inference defilters

In [None]:
dt = fin_dt.copy()

In [None]:
## Inference funtion takes 6 arrugments(model,param,source path, and destination path of images,dataframe(classifier),label)
#haze,	blur,	rain,	noise



In [None]:
inference(model_B,params_B,'/content/Test_data/not_defiltered','/content/defilter/deblur',dt,'blur')

['000000000529.jpg', '000000000077.jpg', '000000006040.jpg', '000000000827.jpg']


In [None]:
inference(model_H,params_H,'/content/Test_data/not_defiltered','/content/defilter/dehaze',dt,'haze')

['000000002890.jpg', '000000023760.jpg', '000000010142.jpg', '000000023429.jpg']


In [None]:
inference(model_N,params_N,'/content/Test_data/not_defiltered','/content/defilter/denoise',dt,'noise')

['000000000716.jpg', '000000002429.jpg', '000000000257.jpg', '000000002415.jpg', '000000005883.jpg']


In [None]:
inference(model_R,params_R,'/content/Test_data/not_defiltered','/content/defilter/derain',dt,'rain')

['000000061076.jpg', '000000012096.jpg', '000000042050.jpg']


###swinIR-DeCompression 

In [None]:
%cd /content/
# Clone realESRGAN
!git clone https://github.com/xinntao/Real-ESRGAN.git
%cd Real-ESRGAN
# Set up the environment
!pip install basicsr
!pip install facexlib
!pip install gfpgan
!pip install -r requirements.txt
!python setup.py develop

# Clone BSRGAN
!git clone https://github.com/cszn/BSRGAN.git

!rm -r SwinIR
# Clone SwinIR
!git clone https://github.com/JingyunLiang/SwinIR.git
!pip install timm

# Download the pre-trained models
!wget https://github.com/cszn/KAIR/releases/download/v1.0/BSRGAN.pth -P BSRGAN/model_zoo
!wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P experiments/pretrained_models
!wget https://github.com/JingyunLiang/SwinIR/releases/download/v0.0/003_realSR_BSRGAN_DFO_s64w8_SwinIR-M_x4_GAN.pth -P experiments/pretrained_models
!wget https://github.com/JingyunLiang/SwinIR/releases/download/v0.0/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pth -P experiments/pretrained_models

/content
Cloning into 'Real-ESRGAN'...
remote: Enumerating objects: 755, done.[K
remote: Total 755 (delta 0), reused 0 (delta 0), pack-reused 755[K
Receiving objects: 100% (755/755), 5.37 MiB | 12.46 MiB/s, done.
Resolving deltas: 100% (410/410), done.
/content/Real-ESRGAN
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
/usr/local/lib/python3.10/dist-packages/setuptools/__init__.py:84: _DeprecatedInstaller: setuptools.installer and fetch_build_eggs are deprecated.
!!

        ********************************************************************************
        Requirements should be satisfied by a PEP 517 installer.
        If you are usin

In [None]:
test_patch_wise=False
import os
import glob
from google.colab import files
import shutil



**Replace main_test_swinir.py python script  in below cell with main_test_swinir.py code that we have provided.**

In [None]:
#### SwinIR _Test script

# --folder_lq:  path to input folder
# --Output_path:  path to output folder
#--model_path:  path to model

#if you face any CUDA out of memory please restart the notebook and run this cell again

if test_patch_wise:
  !python /content/Real-ESRGAN/SwinIR/main_test_swinir.py --task real_sr  --model_path experiments/pretrained_models/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pth --folder_lq '/content/test_data/compress/' --Output_path '/content/defilter/decompression/' --scale 4 --large_model --tile 640
else:
  !python /content/Real-ESRGAN/SwinIR/main_test_swinir.py --task real_sr  --model_path experiments/pretrained_models/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pth --folder_lq '/content/test_data/compress/' --Output_path '/content/defilter/decompression/' --scale 4 --large_model

downloading model experiments/pretrained_models/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pth
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
idx 0 path 000000002894
height 640
Testing 0 000000002894        
idx 1 path 000000003793
height 428
Testing 1 000000003793        
idx 2 path 000000005809
height 640
Testing 2 000000005809        
idx 3 path 000000007784
height 375
Testing 3 000000007784        


##Uretinex_image constrast,image enchancement

In [None]:
%cd '/content/'
!git clone https://github.com/AndersonYong/URetinex-Net.git

/content
Cloning into 'URetinex-Net'...
remote: Enumerating objects: 201, done.[K
remote: Counting objects: 100% (76/76), done.[K
remote: Compressing objects: 100% (38/38), done.[K
remote: Total 201 (delta 38), reused 72 (delta 36), pack-reused 125[K
Receiving objects: 100% (201/201), 5.16 MiB | 21.15 MiB/s, done.
Resolving deltas: 100% (74/74), done.




**Replace test.py python script  in below cell with test.py code that we have provided.**

In [None]:
!python /content/URetinex-Net/test.py --img_path "/content/test_data/enchance" --output '/content/defilter/de_enchacement' --Decom_model_low_path /content/URetinex-Net/ckpt/init_low.pth --unfolding_model_path /content/URetinex-Net/ckpt/unfolding.pth --adjust_model_path /content/URetinex-Net/ckpt/L_adjust.pth

img_path /content/test_data/enchance
output /content/defilter/de_enchacement
ratio 5
Decom_model_low_path /content/URetinex-Net/ckpt/init_low.pth
unfolding_model_path /content/URetinex-Net/ckpt/unfolding.pth
adjust_model_path /content/URetinex-Net/ckpt/L_adjust.pth
gpu_id 0
Decom(
  (decom): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): LeakyReLU(negative_slope=0.2, inplace=True)
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): LeakyReLU(negative_slope=0.2, inplace=True)
    (6): Conv2d(32, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
  )
)
HalfDnCNNSE(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU(inplace=True)
  (conv2): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU(inpla

#InterImage_Object_detection

###import libraries

In [None]:
!git clone https://github.com/OpenGVLab/InternImage.git

Cloning into 'InternImage'...
remote: Enumerating objects: 953, done.[K
remote: Counting objects: 100% (606/606), done.[K
remote: Compressing objects: 100% (400/400), done.[K
remote: Total 953 (delta 238), reused 498 (delta 188), pack-reused 347[K
Receiving objects: 100% (953/953), 22.64 MiB | 18.09 MiB/s, done.
Resolving deltas: 100% (392/392), done.


In [None]:
!pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113  -f https://download.pytorch.org/whl/torch_stable.html

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.11.0+cu113
  Downloading https://download.pytorch.org/whl/cu113/torch-1.11.0%2Bcu113-cp310-cp310-linux_x86_64.whl (1637.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 GB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.12.0+cu113
  Downloading https://download.pytorch.org/whl/cu113/torchvision-0.12.0%2Bcu113-cp310-cp310-linux_x86_64.whl (22.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.3/22.3 MB[0m [31m50.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch, torchvision
  Attempting uninstall: torch
    Found existing installation: torch 2.0.0+cu118
    Uninstalling torch-2.0.0+cu118:
      Successfully uninstalled torch-2.0.0+cu118
  Attempting uninstall: torchvision
    Found existing installation: 

In [None]:
!pip install -U openmim
!mim install mmcv-full==1.5.0
!pip install timm==0.6.11 mmdet==2.28.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openmim
  Downloading openmim-0.3.7-py2.py3-none-any.whl (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.3/51.3 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting model-index
  Downloading model_index-0.1.11-py3-none-any.whl (34 kB)
Collecting colorama
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Collecting ordered-set
  Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)
Installing collected packages: ordered-set, colorama, model-index, openmim
Successfully installed colorama-0.4.6 model-index-0.1.11 openmim-0.3.7 ordered-set-4.1.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://download.openmmlab.com/mmcv/dist/cu113/torch1.11.0/index.html
Collecting mmcv-full==1.5.0
  Downloading https://download.openmmlab.com/mmcv/dist/cu113/torch1.11.0/mmcv_fu

In [None]:
!pip install opencv-python termcolor yacs pyyaml scipy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yacs
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Installing collected packages: yacs
Successfully installed yacs-0.1.8


In [None]:
%cd /content/InternImage/detection
%cd ops_dcnv3
!sh make.sh
# unit test (should see all checking is True)
!python /content/InternImage/detection/ops_dcnv3/test.py

/content/InternImage/detection
/content/InternImage/detection/ops_dcnv3
running build
running build_py
creating build
creating build/lib.linux-x86_64-3.10
creating build/lib.linux-x86_64-3.10/modules
copying modules/__init__.py -> build/lib.linux-x86_64-3.10/modules
copying modules/dcnv3.py -> build/lib.linux-x86_64-3.10/modules
creating build/lib.linux-x86_64-3.10/functions
copying functions/dcnv3_func.py -> build/lib.linux-x86_64-3.10/functions
copying functions/__init__.py -> build/lib.linux-x86_64-3.10/functions
running build_ext
building 'DCNv3' extension
creating build/temp.linux-x86_64-3.10
creating build/temp.linux-x86_64-3.10/content
creating build/temp.linux-x86_64-3.10/content/InternImage
creating build/temp.linux-x86_64-3.10/content/InternImage/detection
creating build/temp.linux-x86_64-3.10/content/InternImage/detection/ops_dcnv3
creating build/temp.linux-x86_64-3.10/content/InternImage/detection/ops_dcnv3/src
creating build/temp.linux-x86_64-3.10/content/InternImage/detec

###object_detection_testing

In [None]:
!wget https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_xl_fpn_3x_coco.pth

--2023-05-05 02:43:07--  https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_xl_fpn_3x_coco.pth
Resolving huggingface.co (huggingface.co)... 18.155.68.44, 18.155.68.116, 18.155.68.38, ...
Connecting to huggingface.co (huggingface.co)|18.155.68.44|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.huggingface.co/repos/29/b8/29b884d43d991fb1da1715a1ff9ec2e0f0c0bee808c6c6988adcf442954ffdf5/9214c6c9af906d1fc5c8f33f48911be204aa231039191eb89a7bf0579ce57003?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27cascade_internimage_xl_fpn_3x_coco.pth%3B+filename%3D%22cascade_internimage_xl_fpn_3x_coco.pth%22%3B&Expires=1683513788&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9jZG4tbGZzLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzI5L2I4LzI5Yjg4NGQ0M2Q5OTFmYjFkYTE3MTVhMWZmOWVjMmUwZjBjMGJlZTgwOGM2YzY5ODhhZGNmNDQyOTU0ZmZkZjUvOTIxNGM2YzlhZjkwNmQxZmM1YzhmMzNmNDg5MTFiZTIwNGFhMjMxMDM5MTkxZWI4OWE3YmYwNTc5Y2U1NzAwMz9yZXNwb25zZS1jb2

In [None]:
import os
import shutil

os.mkdir('/content/final')

def moving(source,destination):
  allfiles = os.listdir(source)
  for f in allfiles:
    src_path = os.path.join(source, f)
    dst_path = os.path.join(destination, f)
    shutil.move(src_path, dst_path)

#path to defilter folders
moving('/content/defilter/de_enchacement','/content/final')

moving('/content/defilter/deblur','/content/final')

moving('/content/defilter/decompression','/content/final')

moving('/content/defilter/dehaze','/content/final')

moving('/content/defilter/denoise','/content/final')

moving('/content/defilter/derain','/content/final')


**Replace image_demo.py python script in below cell with image_demo.py code that we have provided.**

In [None]:
!python /content/InternImage/detection/image_demo.py  --config /content/InternImage/detection/configs/coco/cascade_internimage_xl_fpn_3x_coco.py \
--checkpoint /content/InternImage/detection/ops_dcnv3/cascade_internimage_xl_fpn_3x_coco.pth --output_path /content/ --input_path /content/final/ \
--async-test

2023-05-05 02:50:29,114 - mmdet - INFO - using core type: DCNv3
2023-05-05 02:50:29,114 - mmdet - INFO - using activation layer: GELU
2023-05-05 02:50:29,114 - mmdet - INFO - using main norm layer: LN
2023-05-05 02:50:29,114 - mmdet - INFO - using dpr: linear, 0.6
2023-05-05 02:50:29,114 - mmdet - INFO - level2_post_norm: False
2023-05-05 02:50:29,114 - mmdet - INFO - level2_post_norm_block_ids: None
2023-05-05 02:50:29,114 - mmdet - INFO - res_post_norm: False
load checkpoint from local path: /content/InternImage/detection/ops_dcnv3/cascade_internimage_xl_fpn_3x_coco.pth
/content/final/000000000077.jpg
tensor box tensor([[ 23.5884, 165.5204, 124.8739, 335.8108],
        [215.1912,  55.9827, 320.1149, 177.5109],
        [319.2914,  54.3782, 413.6980, 148.8909],
        [270.9822,  57.3008, 291.7959, 141.8118],
        [269.8721,  56.3708, 312.4679, 142.4763]])
5
BB [23.588411331176758, 165.52035522460938, 124.87390899658203, 335.8108215332031]
SCORE 0.9993582367897034
class 1
BB [215.1

###annotations_preprocessing to coco format

In [None]:
%cd /content/
#download train annotation file
!wget https://www.l2ti.univ-paris13.fr/VSQuad/CD-COCO_ICIP2023_Challenge/train_annotations/train.json

/content
--2023-05-05 02:48:18--  https://www.l2ti.univ-paris13.fr/VSQuad/CD-COCO_ICIP2023_Challenge/train_annotations/train.json
Resolving www.l2ti.univ-paris13.fr (www.l2ti.univ-paris13.fr)... 194.254.163.51
Connecting to www.l2ti.univ-paris13.fr (www.l2ti.univ-paris13.fr)|194.254.163.51|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 974900235 (930M) [application/json]
Saving to: ‘train.json’


2023-05-05 02:49:51 (10.3 MB/s) - ‘train.json’ saved [974900235/974900235]



In [None]:
## take out class names and their respective id
import pandas as pd
import json
f = open('/content/train.json')
data = json.load(f)
model_list=[]
model_id=[]
for i in range(len(data['categories'])):
  x=data['categories'][i]
  print(x['name'])
  model_list.append(x['name'])
  model_id.append(x['id'])  

correct_label=pd.concat([  pd.DataFrame(model_list,columns=['name']),   pd.DataFrame(model_id,columns=['id'])  ],axis=1)
correct_label

person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush


Unnamed: 0,name,id
0,person,1
1,bicycle,2
2,car,3
3,motorcycle,4
4,airplane,5
...,...,...
75,vase,86
76,scissors,87
77,teddy bear,88
78,hair drier,89


In [None]:
#correct_label.to_csv('/content/correct_coco_lab.csv',index=False)
#correct_label=pd.read_csv('/content/correct_coco_lab.csv')
#correct_label

In [None]:
len(model_list),len(model_id)

(80, 80)

In [None]:
model_id

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 27,
 28,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 67,
 70,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 84,
 85,
 86,
 87,
 88,
 89,
 90]

In [None]:
model_list

['person',
 'bicycle',
 'car',
 'motorcycle',
 'airplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light',
 'fire hydrant',
 'stop sign',
 'parking meter',
 'bench',
 'bird',
 'cat',
 'dog',
 'horse',
 'sheep',
 'cow',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'backpack',
 'umbrella',
 'handbag',
 'tie',
 'suitcase',
 'frisbee',
 'skis',
 'snowboard',
 'sports ball',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'bottle',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'sandwich',
 'orange',
 'broccoli',
 'carrot',
 'hot dog',
 'pizza',
 'donut',
 'cake',
 'chair',
 'couch',
 'potted plant',
 'bed',
 'dining table',
 'toilet',
 'tv',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'cell phone',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'refrigerator',
 'book',
 'clock',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'toothbrush']

In [None]:
#coco labels
coco_labels= [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
         'hair drier', 'toothbrush' ]

In [None]:
# path to object detection predicted annotation file(.csv)
Int = pd.read_csv('/content/final.csv')
Int

Unnamed: 0,class_name,filename,x1,y1,x2,y2,Score
0,1,/content/final/000000000077.jpg,23.588411,165.520355,124.873909,335.810822,0.999358
1,1,/content/final/000000000077.jpg,215.191223,55.982704,320.114929,177.510880,0.999186
2,1,/content/final/000000000077.jpg,319.291443,54.378246,413.697968,148.890930,0.998112
3,1,/content/final/000000000077.jpg,270.982239,57.300797,291.795868,141.811752,0.716462
4,1,/content/final/000000000077.jpg,269.872070,56.370823,312.467865,142.476288,0.525565
...,...,...,...,...,...,...,...
203,31,/content/final/000000023760.jpg,224.192322,311.866547,431.333130,351.121368,0.360913
204,1,/content/final/000000042050.jpg,206.625778,161.371231,326.025604,469.326904,0.999154
205,31,/content/final/000000042050.jpg,129.254669,413.546326,332.976593,534.774963,0.981853
206,8,/content/final/000000061076.jpg,296.146912,165.564987,621.678528,301.161682,0.994754


In [None]:
#Int=Int.drop(['index'], axis=1)
#Int

In [None]:
#convert xyxy format of InterImage method to COCO format xywh
def xyxy2xywh(dt):
  x1=dt['x1'].tolist()
  y1=dt['y1'].tolist()
  x2=dt['x2'].tolist()
  y2=dt['y2'].tolist()
  xmin=[]
  ymin=[]
  w=[]
  h=[]
  for i in range(len(dt)):
    xA=x1[i]
    xB=x2[i]-x1[i]
    yA=y1[i]
    yB=y2[i]-y1[i]
    xmin.append(xA)
    ymin.append(yA)
    w.append(xB)
    h.append(yB)

  return xmin,ymin,w,h

xmin,ymin,w,h = xyxy2xywh(Int)
Int=pd.concat([Int, pd.DataFrame(xmin,columns=['xmin']),pd.DataFrame(ymin,columns=['ymin']),
           pd.DataFrame(w,columns=['w']),pd.DataFrame(h,columns=['h']) ],axis=1)
Int.head(5)

Unnamed: 0,class_name,filename,x1,y1,x2,y2,Score,xmin,ymin,w,h
0,1,/content/final/000000000077.jpg,23.588411,165.520355,124.873909,335.810822,0.999358,23.588411,165.520355,101.285498,170.290466
1,1,/content/final/000000000077.jpg,215.191223,55.982704,320.114929,177.51088,0.999186,215.191223,55.982704,104.923706,121.528175
2,1,/content/final/000000000077.jpg,319.291443,54.378246,413.697968,148.89093,0.998112,319.291443,54.378246,94.406525,94.512684
3,1,/content/final/000000000077.jpg,270.982239,57.300797,291.795868,141.811752,0.716462,270.982239,57.300797,20.813629,84.510956
4,1,/content/final/000000000077.jpg,269.87207,56.370823,312.467865,142.476288,0.525565,269.87207,56.370823,42.595795,86.105465


In [None]:
#coco_labels[2-1]

'bicycle'

In [None]:
## OUTPUT of Interimage model class labels are 1..to 80 labels.. assign the class names with new columns
labe=[]
for i in range(len(Int)):
  h=Int['class_name'][i]
  labe.append(coco_labels[h-1])
  #print(coco_labels[h-1])


In [None]:
labe=pd.DataFrame(labe,columns=['name'])
Int=pd.concat([Int,labe],axis=1)
Int

Unnamed: 0,class_name,filename,x1,y1,x2,y2,Score,xmin,ymin,w,h,name
0,1,/content/final/000000000077.jpg,23.588411,165.520355,124.873909,335.810822,0.999358,23.588411,165.520355,101.285498,170.290466,person
1,1,/content/final/000000000077.jpg,215.191223,55.982704,320.114929,177.510880,0.999186,215.191223,55.982704,104.923706,121.528175,person
2,1,/content/final/000000000077.jpg,319.291443,54.378246,413.697968,148.890930,0.998112,319.291443,54.378246,94.406525,94.512684,person
3,1,/content/final/000000000077.jpg,270.982239,57.300797,291.795868,141.811752,0.716462,270.982239,57.300797,20.813629,84.510956,person
4,1,/content/final/000000000077.jpg,269.872070,56.370823,312.467865,142.476288,0.525565,269.872070,56.370823,42.595795,86.105465,person
...,...,...,...,...,...,...,...,...,...,...,...,...
203,31,/content/final/000000023760.jpg,224.192322,311.866547,431.333130,351.121368,0.360913,224.192322,311.866547,207.140808,39.254822,skis
204,1,/content/final/000000042050.jpg,206.625778,161.371231,326.025604,469.326904,0.999154,206.625778,161.371231,119.399826,307.955673,person
205,31,/content/final/000000042050.jpg,129.254669,413.546326,332.976593,534.774963,0.981853,129.254669,413.546326,203.721924,121.228638,skis
206,8,/content/final/000000061076.jpg,296.146912,165.564987,621.678528,301.161682,0.994754,296.146912,165.564987,325.531616,135.596695,truck


In [None]:
Int['class_name'].value_counts()

1     72
3     19
26    12
8     11
31    10
10     9
57     7
37     6
25     5
46     4
7      4
75     4
76     4
42     4
6      3
61     3
63     3
27     3
4      2
72     2
64     2
70     2
47     2
73     2
38     1
34     1
48     1
14     1
59     1
28     1
44     1
17     1
40     1
20     1
67     1
23     1
12     1
Name: class_name, dtype: int64

In [None]:
## Remove the image path and .jpg from filename column 
final_dat=Int.copy()
final_dat['filename'] = final_dat['filename'].str.replace('/content/final/','')
final_dat['filename'] = final_dat['filename'].str.replace('.jpg','')


  final_dat['filename'] = final_dat['filename'].str.replace('.jpg','')


In [None]:
final_dat

Unnamed: 0,class_name,filename,x1,y1,x2,y2,Score,xmin,ymin,w,h,name
0,1,000000000077,23.588411,165.520355,124.873909,335.810822,0.999358,23.588411,165.520355,101.285498,170.290466,person
1,1,000000000077,215.191223,55.982704,320.114929,177.510880,0.999186,215.191223,55.982704,104.923706,121.528175,person
2,1,000000000077,319.291443,54.378246,413.697968,148.890930,0.998112,319.291443,54.378246,94.406525,94.512684,person
3,1,000000000077,270.982239,57.300797,291.795868,141.811752,0.716462,270.982239,57.300797,20.813629,84.510956,person
4,1,000000000077,269.872070,56.370823,312.467865,142.476288,0.525565,269.872070,56.370823,42.595795,86.105465,person
...,...,...,...,...,...,...,...,...,...,...,...,...
203,31,000000023760,224.192322,311.866547,431.333130,351.121368,0.360913,224.192322,311.866547,207.140808,39.254822,skis
204,1,000000042050,206.625778,161.371231,326.025604,469.326904,0.999154,206.625778,161.371231,119.399826,307.955673,person
205,31,000000042050,129.254669,413.546326,332.976593,534.774963,0.981853,129.254669,413.546326,203.721924,121.228638,skis
206,8,000000061076,296.146912,165.564987,621.678528,301.161682,0.994754,296.146912,165.564987,325.531616,135.596695,truck


In [None]:
## assign the correct labels according to train annotation file(json file)
correct_class=[]
class_yolo  =  final_dat['name'].tolist()
corr_class  =  correct_label['name'].tolist()
corr_id  =  correct_label['id'].tolist()
for i in range(len(class_yolo)):
  for j in range(len(corr_class)):
    if class_yolo[i] == corr_class[j]:
      correct_class.append(corr_id[j])
    else : 
      pass


In [None]:
len(correct_class)

208

In [None]:
# concatenate the corrected class to original prediction file
d=pd.DataFrame(correct_class,columns=['class_id'])
d1=final_dat.reset_index()
final = pd.concat([d1,d],axis=1)
final

Unnamed: 0,index,class_name,filename,x1,y1,x2,y2,Score,xmin,ymin,w,h,name,class_id
0,0,1,000000000077,23.588411,165.520355,124.873909,335.810822,0.999358,23.588411,165.520355,101.285498,170.290466,person,1
1,1,1,000000000077,215.191223,55.982704,320.114929,177.510880,0.999186,215.191223,55.982704,104.923706,121.528175,person,1
2,2,1,000000000077,319.291443,54.378246,413.697968,148.890930,0.998112,319.291443,54.378246,94.406525,94.512684,person,1
3,3,1,000000000077,270.982239,57.300797,291.795868,141.811752,0.716462,270.982239,57.300797,20.813629,84.510956,person,1
4,4,1,000000000077,269.872070,56.370823,312.467865,142.476288,0.525565,269.872070,56.370823,42.595795,86.105465,person,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,203,31,000000023760,224.192322,311.866547,431.333130,351.121368,0.360913,224.192322,311.866547,207.140808,39.254822,skis,35
204,204,1,000000042050,206.625778,161.371231,326.025604,469.326904,0.999154,206.625778,161.371231,119.399826,307.955673,person,1
205,205,31,000000042050,129.254669,413.546326,332.976593,534.774963,0.981853,129.254669,413.546326,203.721924,121.228638,skis,35
206,206,8,000000061076,296.146912,165.564987,621.678528,301.161682,0.994754,296.146912,165.564987,325.531616,135.596695,truck,8


In [None]:
#class_name 31,12 class_id 35,13

In [None]:
## for prediction file submission we want image filename should start with only real number,so we delete the leading zeros
def deleteLeadingZeros(inputString):
  result1=[]
  for i in range(len(inputString)):
    result = int(inputString[i])
    result1.append(result)
  return result1

In [None]:
#create a dictionary as coco format from the final predition dataframe file

import json
import os

x=final['xmin'].tolist()
y=final['ymin'].tolist()

w1=final['w'].tolist()
h1=final['h'].tolist()

c=final['Score'].tolist()
id=final['filename'].tolist()
id1=deleteLeadingZeros(id)
cls=final['class_id'].tolist()




k=0
Annotations=[]
for i in range(len(cls)):
        a_Dict = {"image_id":id1[i],'category_id':int(cls[i]),'bbox':[x[i],y[i],w1[i],h1[i]],'score':c[i]}
        k=k+1
        Annotations.append(a_Dict)
        print(a_Dict)


{'image_id': 77, 'category_id': 1, 'bbox': [23.58841133117676, 165.52035522460938, 101.28549766540529, 170.29046630859375], 'score': 0.9993582367897034}
{'image_id': 77, 'category_id': 1, 'bbox': [215.19122314453125, 55.982704162597656, 104.9237060546875, 121.5281753540039], 'score': 0.9991857409477234}
{'image_id': 77, 'category_id': 1, 'bbox': [319.2914428710937, 54.37824630737305, 94.40652465820318, 94.5126838684082], 'score': 0.9981123208999634}
{'image_id': 77, 'category_id': 1, 'bbox': [270.98223876953125, 57.30079650878906, 20.813629150390625, 84.51095581054688], 'score': 0.7164623737335205}
{'image_id': 77, 'category_id': 1, 'bbox': [269.8720703125, 56.37082290649414, 42.595794677734375, 86.10546493530273], 'score': 0.5255651473999023}
{'image_id': 77, 'category_id': 41, 'bbox': [46.83818817138672, 319.16534423828125, 40.717185974121094, 23.87408447265625], 'score': 0.927571713924408}
{'image_id': 77, 'category_id': 41, 'bbox': [235.4725341796875, 129.73558044433594, 31.1024780

save the json file

In [None]:
import json
import numpy as np

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        if isinstance(obj, np.string_):
            return obj.int(obj)
        return json.JSONEncoder.default(self, obj)
json_str = json.dumps(Annotations, cls=NpEncoder)

print(json_str) 
print(type(json_str))  

[{"image_id": 77, "category_id": 1, "bbox": [23.58841133117676, 165.52035522460938, 101.28549766540529, 170.29046630859375], "score": 0.9993582367897034}, {"image_id": 77, "category_id": 1, "bbox": [215.19122314453125, 55.982704162597656, 104.9237060546875, 121.5281753540039], "score": 0.9991857409477234}, {"image_id": 77, "category_id": 1, "bbox": [319.2914428710937, 54.37824630737305, 94.40652465820318, 94.5126838684082], "score": 0.9981123208999634}, {"image_id": 77, "category_id": 1, "bbox": [270.98223876953125, 57.30079650878906, 20.813629150390625, 84.51095581054688], "score": 0.7164623737335205}, {"image_id": 77, "category_id": 1, "bbox": [269.8720703125, 56.37082290649414, 42.595794677734375, 86.10546493530273], "score": 0.5255651473999023}, {"image_id": 77, "category_id": 41, "bbox": [46.83818817138672, 319.16534423828125, 40.717185974121094, 23.87408447265625], "score": 0.927571713924408}, {"image_id": 77, "category_id": 41, "bbox": [235.4725341796875, 129.73558044433594, 31.

In [None]:
#json_object = json.dumps(data)
with open("predict.json", "w") as outfile:
    outfile.write(json_str)