# Data analysis can be split into three elementary steps:
## 1. Importing the data
All the methods necessary to **read** the file contents, parse data and output it in desired format. Ie. something like that:
 
```
import pathlib
import pandas as pd

def data_import(self, path: pathlib.Path) -> pd.DataFrame:
    # ... 
    # dataframe =
    return dataframe
```

## 2. Processing the data
All the smoothing, normalization, interpolation, etc. should happen here. Implemented using composite design pattern.

## 3. Showing the data
ble ble

# Implementation using bridge design pattern/dependency injection:


In [None]:
import pathlib
from pathlib import Path
from IPython.display import display
from abc import ABC, abstractmethod
from typing import Callable, Optional, List
import pandas as pd
from pandas.errors import ParserError

class Handler(ABC):
    @abstractmethod
    def handle(self, *args, **kwargs):
        raise NotImplementedError("You should implement this!")
        
    def update_handler(self, new_handler: Callable) -> None:
        self.handle = new_handler
        
class FileHandler(Handler):
    def handle(self, path: pathlib.Path) -> pd.DataFrame:
        with open(path) as f:
            for i, row in enumerate(f):
                for delimiter, decimal in (itertools.product([';',',','\t'], ['.',','])):
                    try:
                        df = pd.read_csv(path ,delimiter=delimiter, decimal=decimal, header=i,on_bad_lines='skip', dtype=np.float64)
                        return df
                    except Exception:
                        pass
    
class Hitachi(Handler):
    def handle(self, path: pathlib.Path) -> pd.DataFrame:
        return pd.read_csv(path ,delimiter='\t',header=27,on_bad_lines='skip')

class Avantes(Handler):
    def handle(self, path: pathlib.Path) -> pd.DataFrame:
        return pd.read_csv(path,delimiter=';',decimal=',',header=5,on_bad_lines='skip')

class DataHandler(Handler):
    def __init__(self, handler: Optional[Callable]=None):
        super().__init__()
        
        if handler is not None:
            self.update_handler(handler)
        
    def handle(self,  data: pd.DataFrame) -> pd.DataFrame:
        return data

class Composite(DataHandler):
    def __init__(self) -> None:
        self.operations: List[DataHandler] = []
        
    def handle(self,  data: pd.DataFrame) -> pd.DataFrame:
        for o in self.operations:
            data = o.handle( data )
        return data

    def add(self, *operations: DataHandler) -> None:
        for o in operations:
            self.operations.append(o)

    def remove(self,*operations: DataHandler) -> None:
        for o in operations:
            self.operations.remove(o)
            
class DataModel(ABC):
    def __init__(self, path: pathlib.Path, reader: Optional[FileHandler]=FileHandler(), pipeline: Optional[DataHandler]=Composite()) -> None:
        self._reader=reader
        self._pipeline=pipeline
        self.path=path
    @property
    def path(self):
        return self._path
    
    @path.setter
    def path(self, path: pathlib.Path):
        self._path = path
    @property
    def raw_data(self):
        try:
            return self._reader.handle(self.path)
        except ParserError as e:
            display(repr(e))
            return pd.DataFrame()
    @property
    def data(self):
        return self._pipeline.handle(self.raw_data)
    
        
dm = DataModel(path=Path('../patka-pomiary/CPE45_PFOBPy_comocat.txt'), reader=Hitachi())
display(dm.data[:5])
dm._pipeline.add(DataHandler(handler=lambda df: df+5))
dm._pipeline.add(DataHandler(handler=lambda df: df+5))
dm._pipeline.add(DataHandler(handler=lambda df: df+5))
display(dm.data[:5])

dm2 = DataModel(path=Path('../patka-pomiary/func39.txt'))
display(dm2.data)
dm2._pipeline.add(DataHandler(handler=lambda df: df+5))
display(dm2.data)



In [53]:
import re
import numpy as np
import itertools

def force_open(path):
    with open(path) as f:
        for i, row in enumerate(f):
            for delimiter, decimal in (itertools.product([';',',','\t'], ['.',','])):
                try:
                    df = pd.read_csv(path ,delimiter=delimiter, decimal=decimal, header=i,on_bad_lines='skip', dtype=np.float64)
                    return df
                except Exception:
                    pass
            

df1=force_open(Path('../patka-pomiary/CPE45_PFOBPy_comocat.txt'))
df2=force_open(Path('../patka-pomiary/func39.txt'))

display(df1,df2)


Unnamed: 0,nm,Abs
0,1100.0,0.0420
1,1099.0,0.0420
2,1098.0,0.0419
3,1097.0,0.0418
4,1096.0,0.0417
...,...,...
696,404.0,0.0988
697,403.0,0.1040
698,402.0,0.1092
699,401.0,0.1146


Unnamed: 0,[nm],[counts],[counts] .1,[counts] .2
900.12,0.000,0.000,0.000,0.00000
903.33,6707.667,6392.333,6602.000,315.33333
906.55,6594.467,6416.933,6549.600,177.53333
909.76,11168.933,10925.533,11102.867,243.40000
912.97,11050.667,10807.867,10998.467,242.80000
...,...,...,...,...
1692.65,10378.667,10290.600,10303.933,88.06667
1695.73,10633.000,10522.600,10547.733,110.40000
1698.80,9666.400,9549.800,9588.533,116.60000
1701.87,10612.000,10447.667,10467.667,164.33333


In [11]:
dm.path.read_text()

"Sample:\t'CPE45_PFOBPy_comocat\nFile name:\tCPE45_PFOBPy_comocat.UDS\nRun Date:\t15:07:27, 01/28/2022\nOperator:\tpraca\nComment:\t'\n\nInstrument\nModel:\tU-2900 Spectrophotometer\nSerial Number:\t'1903-009\nROM Version:\t2J15301 01\n\nInstrument Parameters\nMeasurement Type:\tWavelength Scan\nData Mode:\tAbs\nStarting Wavelength:\t1100.0 nm\nEnding Wavelength:\t400.0 nm\nScan Speed:\t400 nm/min\nSampling Interval:\t1.0 nm\nSlit Width:\t1.50 nm\nLamp change mode:\tAuto\nAuto change wavelength:\t340.0 nm\nBaseline Correction:\tUser 1\nWait time:\t0 s\nCycle Time:\t54 min\nReplicates:\t1\nResponse:\tMedium\nPath Length:\t10.0 mm\n(Abs values are corrected to 10 mm path length)\n\nData Points\nnm\tAbs\n1100.0\t0.0420\n1099.0\t0.0420\n1098.0\t0.0419\n1097.0\t0.0418\n1096.0\t0.0417\n1095.0\t0.0416\n1094.0\t0.0415\n1093.0\t0.0415\n1092.0\t0.0414\n1091.0\t0.0413\n1090.0\t0.0413\n1089.0\t0.0412\n1088.0\t0.0412\n1087.0\t0.0412\n1086.0\t0.0412\n1085.0\t0.0412\n1084.0\t0.0412\n1083.0\t0.0413\n1