In [1]:
import xml.etree.ElementTree as ET
import pandas as pd

# Out XML file struct

The struct of out XML file will be as follow:

- Metadata(name = "Metadata"){text: Description of the file}
    - Variable(name, type) {text: Variable description}
        - Attributes(name statistic analyzed)
    - \Variable
- \Metadata

In [30]:
class Metadata():
    
    """
    A class that allow you to get variable information of a dataset

    ...

    Attributes
    ----------
    dataset : str
        a formatted string the same of the dataset choosen name

    meta : obj
        a ElementTree object that has connection with XML file
    
    root : obj
        a root of the XML file
        
    Methods
    ----------
    getMeta()
        Prints the dataset name
        
    to_dataframe()
        Return a Pandas DataFrame with the XML file childs tag, attrib and text
    
    getDescription(variable)
        Get the text associate to the variable input, that literally is the variable explanation
    
    describe()
        Return a description of the XML file

    """

    
    def __init__(self, dataset):
        
        """
        Constructor default method
        
        Parameters
        ----------
        dataset : str
            a formatted string the same of the dataset choosen name
            
        """
        
        self.dataset = dataset
        
    
        self.meta = ET.parse(self.dataset + ".xml")

    def __str__(self):
        
        """
        Print default method
                
        Return
        ----------
        String
            a formatted string with the XML file root name and description
            
        """
            
        self.root = self.meta.getroot()
        
        return_str = f"The current metadata name is {self.root.tag}. Description: {self.root.text}"
        
        return return_str
    
    def getMeta(self):
        
        """
        Get method to return the object name
                
        Return
        ----------
        String
            a formatted string with the XML file name
            
        """
        
        return self.dataset
    
    def to_dataframe(self):
        
        """
        Method to transform the XML file in Pandas DataFrame (simple mode)
                
        Return
        ----------
        Pandas DataFrame
            a Pandas DataFrame with just tag, attribute and text of childs in the root that has tag "variable"
            
        """
        
        # Auxiliar Lists to store tag, attrib and text information
        tag = []
        attrib = []
        text = []
        
        # Running along childs that have tag equals to "variable" and appending in the auxiliar lists
        for child in self.root.iter('variable'):
            tag.append(child.tag)
            attrib.append(child.attrib.get('name'))
            text.append(child.text)
        
        # Creating Pandas DataFrame
        df_result = pd.DataFrame({'Name': attrib})
        
        
        tag = []
        attrib = []
        text = []
        
        for child in self.root.iter('description'):
            text.append(child.text)
        
        text.pop()
        
        df_result['description'] = text
        
        return df_result
    
    def getDescription(self, variable):
        
        """
        Method that get description of an variable
        
        Parameters
        ----------
        variable : str
            a formatted string with the variable name
        
        Return
        ----------
        Pandas DataFrame
            a Pandas DataFrame with just tag, attribute and text of childs in the root that has tag "variable"
            
        """
        
        df_result = self.to_dataframe()
        
        return df_result[df_result['Name'] == variable]['description'].values[0]
    
    def describe(self):
        
        """
        Method to describe the metadata XML file (**Building)
                
        Return
        ----------
        Pandas DataFrame
            a Pandas DataFrame with a little description of the XML file
            
        """
        
        df_result = self.to_dataframe()
        
        print(f"The file has {df_result.shape[0]} elements")
        
    

In [31]:
help(Metadata)

Help on class Metadata in module __main__:

class Metadata(builtins.object)
 |  A class that allow you to get variable information of a dataset
 |  
 |  ...
 |  
 |  Attributes
 |  ----------
 |  dataset : str
 |      a formatted string the same of the dataset choosen name
 |  
 |  meta : obj
 |      a ElementTree object that has connection with XML file
 |  
 |  root : obj
 |      a root of the XML file
 |      
 |  Methods
 |  ----------
 |  getMeta()
 |      Prints the dataset name
 |      
 |  to_dataframe()
 |      Return a Pandas DataFrame with the XML file childs tag, attrib and text
 |  
 |  getDescription(variable)
 |      Get the text associate to the variable input, that literally is the variable explanation
 |  
 |  describe()
 |      Return a description of the XML file
 |  
 |  Methods defined here:
 |  
 |  __init__(self, dataset)
 |      Constructor default method
 |      
 |      Parameters
 |      ----------
 |      dataset : str
 |          a formatted string the sam

In [32]:
meta = Metadata('meta')

In [33]:
print(meta)

The current metadata name is metadata. Description: 
	


In [34]:
meta.to_dataframe()

Unnamed: 0,Name,description
0,COD,Code of dataset
1,YEAR,Year of observation
2,DAY365,Day of observation in just numeric values
3,MINUTE2400,Minute on the day that the observation was ca...
4,N_WNDVEL_5M,Wind velocity measured 5 meters above ground
5,N_WNDVEL_2M,Wind velocity measured 2 meters above ground
6,N_WNDDIR_DEGREE,Wind direction measured by degrees
7,N_INCRAD_10MSR,Total of 10 measures of incident radiation
8,N_REFRAD_10MSR,Total of 10 measures of reflected radiation
9,N_PHTFLOW_10MSR,Total of 10 measures of positive heat flow in...


In [36]:
meta.getDescription('N_WNDVEL_2M')

' Wind velocity measured 2 meters above ground '

In [37]:
meta.describe()

The file has 18 elements
