## File Manager

Written By: Rupesh Kumar Dey
Notebook for file management

In [1]:
# importing required modules
from zipfile import ZipFile
import os 
import sys
import shutil
!pip install openpyxl
# rm -r .Trash-0

Collecting openpyxl
  Downloading openpyxl-3.0.10-py2.py3-none-any.whl (242 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.1/242.1 KB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.10
You should consider upgrading via the '/usr/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

## Section to unzip files

In [2]:
# Insert File Location
root_path = "/notebooks"

# Insert file location here
path_data = root_path + "/DatasetAug.zip"

# Checking if filepath or zipfile exists
os.path.isfile(path_data)

True

In [3]:
# opening the zip file in READ mode
with ZipFile(path_data, 'r') as zip:
    """
    Function to unzip files from a zip file folder
    """
    # printing all the contents of the zip file
    # zip.printdir()
    # extracting all the files
    print('Extracting all the files now...')
    zip.extractall()
    print('Done!')

Extracting all the files now...
Done!


## Section to Move Folder from one location to another

In [4]:
source = root_path + "/TrainedResultsAugTuned/11_FinalAugmentationModelsTest.ipynb"
destination = root_path + "/11_FinalAugmentationModelsTest.ipynb"
  
shutil.move(source, destination)

'/notebooks/11_FinalAugmentationModelsTest.ipynb'

## Section to Delete Files in folder

In [None]:
import os
file_path = "/notebooks/testing/Dataset_CLAHE/train/0_normal_CLAHE/"
shutil.rmtree('dir_path')

## Section to zip files

In [9]:
import shutil
shutil.make_archive("13B_AugVotingModels", 'zip', "ValResults/13B_AugVotingModels")
print("DONE")

DONE


In [23]:
# For checking purposes
"""
df["ModelName"][0], df["ModelID"][0], df["macro avg"][1],df["macro avg"][0], df["macro avg"][2]
df[df.columns[1]][1], df[df.columns[2]][1], df[df.columns[3]][1], df[df.columns[4]][1]
"""

(1.0, 0.853333333, 0.8, 0.993333333)

## Section to extract and summarize classification report for analysis

In [3]:
def extractClassificationReport(reportsPath):
    """
    Python function file to extract test and validation data from results folders with classification reports and summarize the results in a single CSV file.
    Inputs:
        a) reportsPath - Location where classification reports are stored


    Outputs:
        a) dfResults - summarized results
    """
    import os
    import pandas as pd

    # Create template dataframe
    dfResults = pd.DataFrame(columns=['modelName', 'modelGroup', 'inputSize', 'optimizer', 'lrType',
                                      'transferLearning', 'featureExtraction', 'fineTuning', 'augmentation',
                                      'valAccuracy', 'valPrecision', 'valF1Score', 'testAccuracy', 'testPrecision',
                                      'testF1Score',
                                      'normal_recall_val', 'ulcerative_colitis_recall_val', 'polyps_recall_val',
                                      'esophagitis_recall_val',
                                      'normal_recall_test', 'ulcerative_colitis_recall_test', 'polyps_recall_test',
                                      'esophagitis_recall_test', 'ValTestAverage'])

    # Specify location of test and validation files
    fileListVal = os.listdir(reportsPath + "val/")
    fileListTest = os.listdir(reportsPath + "test/")

    # Ensure that the same number of files for validation and test are saved
    assert len(fileListVal) == len(
        fileListTest), "Please check file directory and ensure both test and val folders have a complete set of data"

    # Process each file
    for i in range(len(fileListVal)):
        # Filename
        filename, file_extension = os.path.splitext(fileListVal[i])

        # Test and validation file to read data from to summarize
        dfVal = pd.read_csv(reportsPath + "val/" + fileListVal[i])
        dfTest = pd.read_csv(reportsPath + "test/" + filename + "_TEST.csv")

        # Print path to check
        print(reportsPath + "val/" + fileListVal[i])
        print(reportsPath + "test/" + filename + "_TEST.csv")
        print("\n")

        # Extract data
        modelName = os.path.splitext(fileListVal[i])[0]
        modelID = dfVal["modelTag"][0]

        # Input size
        if "basemodel1" in modelName.lower():
            inputSize = 100
        else:
            inputSize = 224

        # optimizer
        if "adam" in modelName.lower():
            optimizer = "ADAM"
        else:
            optimizer = "SGD"

        # Learning rate regime
        if "lrscheduler" in modelName.lower():
            lrType = "DYNAMIC"
        else:
            lrType = "STATIC"

        # Transfer learning implemented or not
        if "transferlearning" in modelName.lower():
            transferLearning = True
        else:
            transferLearning = False

        # Is transfer learning type a feature extraction type
        if "transferlearningfeatureextraction" in modelName.lower() and transferLearning == True:
            featureExtraction = True
        else:
            featureExtraction = False

        # Is transfer learning type a fine tuning type
        if "transferlearningfinetuning" in modelName.lower() and transferLearning == True:
            fineTuning = True
        else:
            fineTuning = False

        # Is the model trained on augmented data
        if "11_" in modelName.lower():
            augmentation = True
        else:
            augmentation = False
        
        # Extract results. 
        valAccuracy = dfVal["macro avg"][1]
        valPrecision = dfVal["macro avg"][0]
        valF1Score = dfVal["macro avg"][2]

        testAccuracy = dfTest["macro avg"][1]
        testPrecision = dfTest["macro avg"][0]
        testF1Score = dfTest["macro avg"][2]

        normal_recall_val = dfVal[dfVal.columns[1]][1]
        ulcerative_colitis_recall_val = dfVal[dfVal.columns[2]][1]
        polyps_recall_val = dfVal[dfVal.columns[3]][1]
        esophagitis_recall_val = dfVal[dfVal.columns[4]][1]

        normal_recall_test = dfTest[dfTest.columns[1]][1]
        ulcerative_colitis_recall_test = dfTest[dfTest.columns[2]][1]
        polyps_recall_test = dfTest[dfTest.columns[3]][1]
        esophagitis_recall_test = dfTest[dfTest.columns[4]][1]
        
        # Append data to dataframe
        dfResults = dfResults.append({
            'modelName': modelName,
            'modelGroup': modelID,
            'inputSize': inputSize,
            'optimizer': optimizer,
            'lrType': lrType,
            'transferLearning': transferLearning,
            'featureExtraction': featureExtraction,
            'fineTuning': fineTuning,
            'augmentation': augmentation,
            'valAccuracy': valAccuracy,
            'valPrecision': valPrecision,
            'valF1Score': valF1Score,
            'testAccuracy': testAccuracy,
            'testPrecision': testPrecision,
            'testF1Score': testF1Score,
            'normal_recall_val': normal_recall_val,
            'ulcerative_colitis_recall_val': ulcerative_colitis_recall_val,
            'polyps_recall_val': polyps_recall_val,
            'esophagitis_recall_val': esophagitis_recall_val,
            'normal_recall_test': normal_recall_test,
            'ulcerative_colitis_recall_test': ulcerative_colitis_recall_test,
            'polyps_recall_test': polyps_recall_test,
            'esophagitis_recall_test': esophagitis_recall_test,
            'ValTestAverage': 0.5 * (valAccuracy + testAccuracy)},
            ignore_index=True)
    
    return dfResults

In [5]:
# Perform classification report summary 
dfResults = extractClassificationReport("classificationReportExtraction/")

classificationReportExtraction/val/13A_TransferLearningFeatureExtraction_ResNet50V2_NORMAL_tunedAdamLrScheduler.csv
classificationReportExtraction/test/13A_TransferLearningFeatureExtraction_ResNet50V2_NORMAL_tunedAdamLrScheduler_TEST.csv


classificationReportExtraction/val/13A_TransferLearningFeatureExtraction_MobileNetV1_NORMAL_tunedAdamLrScheduler.csv
classificationReportExtraction/test/13A_TransferLearningFeatureExtraction_MobileNetV1_NORMAL_tunedAdamLrScheduler_TEST.csv


classificationReportExtraction/val/13A_TransferLearningFeatureExtraction_ResNet50V1_MULTISCALE_tunedAdamLrScheduler.csv
classificationReportExtraction/test/13A_TransferLearningFeatureExtraction_ResNet50V1_MULTISCALE_tunedAdamLrScheduler_TEST.csv


classificationReportExtraction/val/13A_TransferLearningFineTuning_ResNet50V2_RAYLEIGH.csv
classificationReportExtraction/test/13A_TransferLearningFineTuning_ResNet50V2_RAYLEIGH_TEST.csv


classificationReportExtraction/val/13_BaseModel2_MULTISCALE_tunedAdamLrScheduler.c

In [6]:
# Convert results to csv and excel and save to directory. 
dfResults.to_csv("classificationReportExtraction13.csv")
dfResults.to_excel(r"classificationReportExtraction13.xlsx", index = False, header=True)

Collecting openpyxl
  Downloading openpyxl-3.0.10-py2.py3-none-any.whl (242 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.1/242.1 KB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.10
You should consider upgrading via the '/usr/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m