# Data analysis can be split into three elementary steps:
## 1. Importing the data
All the methods necessary to **read** the file contents, parse obtained data adapting it into some desired format. 
 
```
import pathlib
import pandas as pd

def data_import(self, path: pathlib.Path) -> pd.DataFrame:
    ...
    self.data = ...
```

## 2. Processing the data
All the smoothing, normalization, interpolation, etc. should happen here. Implemented using composite design pattern.

## 3. Showing the data
ble ble

# Implementation using bridge design pattern/dependency injection:


In [77]:
import pathlib
from abc import ABC, abstractmethod
from typing import Callable, Optional, List
import pandas as pd

class DataParser(ABC):
    @abstractmethod
    def apply(self, path: pathlib.Path) -> pd.DataFrame:
        raise NotImplementedError("You should implement this!")
        
    def change_method(self, new_method: Callable) -> None:
        self.apply = new_method
        
class FooParser(DataParser):
    def apply(self, path: pathlib.Path) -> pd.DataFrame:
        return pd.DataFrame([1,1,1])

class DataProcessor(ABC):
    @abstractmethod
    def apply(self, data: pd.DataFrame) -> pd.DataFrame:
        raise NotImplementedError("You should implement this!")
        
    def change_method(self, new_method: Callable) -> None:
        self.apply = new_method
        
class FooProcessor(DataProcessor):    
    def apply(self,  data: pd.DataFrame) -> pd.DataFrame:
        return data

class Composite(DataProcessor):
    def __init__(self, *operations: DataProcessor) -> None:
        self.operations: List[DataProcessor] = operations
        
    def apply(self,  data: pd.DataFrame) -> pd.DataFrame:
        for o in self.operations:
            data = o.apply( data )
        return data

    def add(self, *operations: DataProcessor) -> None:
        for o in operations:
            self.operations.append(o)

    def remove(self,*operations: DataProcessor) -> None:
        for o in operations:
            self.operations.remove(o)
            
    def show_list(self) -> None:
        print(self.operations)

class DataViewer(ABC):
    @abstractmethod
    def apply(self, data: pd.DataFrame) -> None:
        raise NotImplementedError("You should implement this!")

    def change_method(self, new_method: Callable) -> None:
        self.apply = new_method
        

class FooViewer(DataViewer):    
    def apply(self,  data: pd.DataFrame) -> None:
        display(data)

class DataModel(ABC):
    def __init__(self, path: pathlib.Path, parser_api: Optional[DataParser]=FooParser(), processing_api: Optional[DataProcessor]=FooProcessor()) -> None:
        self.parser_api=parser_api
        self.processing_api=processing_api
        self.path=path
    @property
    def path(self):
        return self._path
    
    @path.setter
    def path(self, path: pathlib.Path):
        self._path = path
    @property
    def raw_data(self):
        return self.parser_api.apply(self.path)
    @property
    def processed_data(self):
        return self.processing_api.apply(self.raw_data)
    
        
dm = DataModel(path=Path())
display(dm.raw_data-dm.processed_data)
display(dm.processed_data)
dm.processing_api.change_method(lambda df: df.add(1))
display(dm.processed_data)



Unnamed: 0,0
0,0
1,0
2,0


Unnamed: 0,0
0,1
1,1
2,1


Unnamed: 0,0
0,2
1,2
2,2


In [154]:
import pathlib
from abc import ABC, abstractmethod
from typing import Callable, Optional, List
import pandas as pd
from pandas.errors import ParserError

class Handler(ABC):
    @abstractmethod
    def handle(self, *args, **kwargs):
        raise NotImplementedError("You should implement this!")
        
    def update_handler(self, new_handler: Callable) -> None:
        self.handle = new_handler
        
class FileHandler(Handler):
    _config = {'delimiter': ',',
              'header': 0,
              'on_bad_lines': 'error'}
  
    def handle(self, path: pathlib.Path) -> pd.DataFrame:
        try:
            return pd.read_csv(path ,delimiter=self._config['delimiter'],header=self._config['header'],on_bad_lines=self._config['on_bad_lines'])
        except ParserError as e:
            raise ParserError(str(e) + " perhaps setting delimiter=, header=, on_bad_lines= is in order.") from e

            return pd.DataFrame()
    
class Hitachi(Handler):
    def handle(self, path: pathlib.Path) -> pd.DataFrame:
        return pd.read_csv(path ,delimiter='\t',header=27,on_bad_lines='skip')

class Avantes(Handler):
    def handle(self, path: pathlib.Path) -> pd.DataFrame:
        return pd.read_csv(path,delimiter=';',decimal=',',header=5,on_bad_lines='skip')

class DataHandler(Handler):
    def __init__(self, handler: Optional[Callable]=None):
        super().__init__()
        
        if handler is not None:
            self.update_handler(handler)
        
    def handle(self,  data: pd.DataFrame) -> pd.DataFrame:
        return data

class Composite(DataHandler):
    def __init__(self) -> None:
        self.operations: List[DataHandler] = []
        
    def handle(self,  data: pd.DataFrame) -> pd.DataFrame:
        for o in self.operations:
            data = o.handle( data )
        return data

    def add(self, *operations: DataHandler) -> None:
        for o in operations:
            self.operations.append(o)

    def remove(self,*operations: DataHandler) -> None:
        for o in operations:
            self.operations.remove(o)
            
class DataModel(ABC):
    def __init__(self, path: pathlib.Path, reader: Optional[FileHandler]=FileHandler(), pipeline: Optional[DataHandler]=Composite()) -> None:
        self._reader=reader
        self._pipeline=pipeline
        self.path=path
    @property
    def path(self):
        return self._path
    
    @path.setter
    def path(self, path: pathlib.Path):
        self._path = path
    @property
    def raw_data(self):
        try:
            return self._reader.handle(self.path)
        except ParserError as e:
            display(repr(e))
            return pd.DataFrame()
    @property
    def data(self):
        return self._pipeline.handle(self.raw_data)
    
        
dm = DataModel(path=Path('./patka-pomiary/CPE45_PFOBPy_comocat.txt'), reader=Hitachi())
display(dm.data[:5])
dm._pipeline.add(DataHandler(handler=lambda df: df+5))
dm._pipeline.add(DataHandler(handler=lambda df: df+5))
dm._pipeline.add(DataHandler(handler=lambda df: df+5))
display(dm.data[:5])

# dm2 = DataModel(path=Path('./patka-pomiary/func39.txt'), reader=Avantes())
# display(dm2.data[:5])



Unnamed: 0,nm,Abs
0,1100.0,0.042
1,1099.0,0.042
2,1098.0,0.0419
3,1097.0,0.0418
4,1096.0,0.0417


Unnamed: 0,nm,Abs
0,1115.0,15.042
1,1114.0,15.042
2,1113.0,15.0419
3,1112.0,15.0418
4,1111.0,15.0417


In [110]:
import pandas as pd

absorbance_csv =  Path('./patka-pomiary/CPE45_PFOBPy_comocat.txt')
fluorescence_csv =  Path('./patka-pomiary/func39.txt')

pd.read_csv(absorbance_csv,delimiter='\t',header=27,on_bad_lines='skip')[:40]

Unnamed: 0,nm,Abs
0,1100.0,0.042
1,1099.0,0.042
2,1098.0,0.0419
3,1097.0,0.0418
4,1096.0,0.0417
5,1095.0,0.0416
6,1094.0,0.0415
7,1093.0,0.0415
8,1092.0,0.0414
9,1091.0,0.0413
