In [4]:
import pandas as pd
import numpy as np
import sklearn.datasets as datasets
from pathlib import Path

def load_iris_dataset():
    return datasets.load_iris(return_X_y=True)

def calculateImbalanceRatio(y):
    uniques_y = np.unique(y,return_counts=True)
    return f'{uniques_y[1].max()/uniques_y[1].min():.2f}'

def get_dataset_info(datasetPath, datasetName):

    X_y_Bands = pd.read_csv(datasetPath , header=None).values
    X = X_y_Bands[:,:-1]
    y = X_y_Bands[:,-1]

    return pd.DataFrame(
        {"DatasetName":datasetName,
            "ClassUsed": [np.unique(y).size],
            "ClassTotal": [np.unique(y).size],
            "ObservationUsed": [X.shape[0]],
            "ObservationTotal": [X.shape[0]],
            "FeatureUsed": [X.shape[1]],
            "FeatureTotal": [X.shape[1]],
            "Imbalance Ratio":calculateImbalanceRatio(y)
        })

def get_bands_dataset_info():

    infoDf = get_dataset_info('../data/processed/Bands.csv', "Cylinder Bands")

    infoDf.loc[0, ['ObservationTotal']] = [541]
    infoDf.loc[0, ['FeatureTotal']] = [39]

    return infoDf

def get_glass_identification_dataset_info():

    infoDf = get_dataset_info('../data/processed/GlassIdentification.csv', "Glass Identification")

    infoDf.loc[0, ['ClassTotal']] = [7]
    infoDf.loc[0, ['FeatureTotal']] = [10]

    return infoDf


def get_iris_dataset_info():
    X, y = load_iris_dataset()

    return pd.DataFrame(
            {"DatasetName":"Iris",
             "ClassUsed": [np.unique(y).size],
             "ClassTotal": [np.unique(y).size],
             "ObservationUsed": [np.shape(X)[0]],
             "ObservationTotal": [np.shape(X)[0]],
             "FeatureUsed": [np.shape(X)[1]],
             "FeatureTotal": [np.shape(X)[1]],
             "Imbalance Ratio":calculateImbalanceRatio(y)
            })

def createEmptySummaryDataSet():
    return pd.DataFrame(columns=['DatasetName','ClassUsed','ClassTotal','ObservationUsed','ObservationTotal','FeatureUsed', 'FeatureTotal',"Imbalance Ratio"])

def getNotElectricalDatasetsInfo():
    summaryDatasets = createEmptySummaryDataSet()
    summaryDatasets = pd.concat([summaryDatasets,get_iris_dataset_info()],ignore_index=True)
    summaryDatasets = pd.concat([summaryDatasets,get_dataset_info('../data/processed/Sonar.csv', "Sonar")],ignore_index=True)
    summaryDatasets = pd.concat([summaryDatasets,get_glass_identification_dataset_info()],ignore_index=True)
    summaryDatasets = pd.concat([summaryDatasets,get_dataset_info('../data/processed/LibrasMovement.csv', "Libras Movement")],ignore_index=True)
    summaryDatasets = pd.concat([summaryDatasets,get_bands_dataset_info()],ignore_index=True)
    return summaryDatasets

def getElectricalDatasetsInfo():
    summaryDatasets = createEmptySummaryDataSet()
    summaryDatasets = pd.concat([summaryDatasets,get_dataset_info('../data/processed/ElectricalFaultClassification.csv', "Electrical Fault Classification")],ignore_index=True)
    summaryDatasets = pd.concat([summaryDatasets,get_dataset_info('../data/processed/ElectricalGridStability.csv', "Electrical Grid Stability")],ignore_index=True)
    summaryDatasets = pd.concat([summaryDatasets,get_dataset_info('../data/processed/ElectricalFaultDetection.csv', "Electrical Fault Detection")],ignore_index=True)
    return summaryDatasets


In [5]:
def main():

    summaryDatasets = createEmptySummaryDataSet()

    summaryDfNotElectrical = getNotElectricalDatasetsInfo()
    summaryDfElectrical = getElectricalDatasetsInfo()

    summaryDatasets = pd.concat([summaryDatasets,summaryDfNotElectrical],ignore_index=True)
    summaryDatasets = pd.concat([summaryDatasets,summaryDfElectrical],ignore_index=True)

    print(summaryDatasets.to_latex(index=False, escape=False))

In [6]:
main()

\begin{tabular}{llllllll}
\toprule
                    DatasetName & ClassUsed & ClassTotal & ObservationUsed & ObservationTotal & FeatureUsed & FeatureTotal & Imbalance Ratio \\
\midrule
                           Iris &         3 &          3 &             150 &              150 &           4 &            4 &            1.00 \\
                          Sonar &         2 &          2 &             208 &              208 &          60 &           60 &            1.14 \\
           Glass Identification &         6 &          7 &             214 &              214 &           9 &           10 &            8.44 \\
                Libras Movement &        15 &         15 &             360 &              360 &          90 &           90 &            1.00 \\
                 Cylinder Bands &         2 &          2 &             365 &              541 &          18 &           39 &            1.70 \\
Electrical Fault Classification &         6 &          6 &            7861 &             786

  print(summaryDatasets.to_latex(index=False, escape=False))
