In [10]:
import pandas as pd
from typing import List
import re



In [13]:
def read_irn_header_csv(file_path: str) -> List[str]:
    RE_IRNS = re.compile(r"[\w\d]+\sirn\s=\s\d+")
    irns = []
    with open(file_path, "r") as input_file:
        for line in input_file:
            if RE_IRNS.search(line):
                irns.append(line.strip())
            else:
                break
                
    return irns


In [12]:
# read irn header
h1 = read_irn_header_csv("data/davr15m_1.csv")
h2 = read_irn_header("data/davr15m_2.csv")

print(h1)
print(h2)

['RTPA110TR411I irn = 112981352', 'RTPA110TR411P irn = 112982352', 'RTPA110TR411Q irn = 112983352', 'RTPA110TR411U irn = 112972352', 'RTPA110TR412I irn = 113163352', 'RTPA110TR412P irn = 113156352', 'RTPA110TR412Q irn = 113320352', 'RTPA110TR412U irn = 112783352', 'RTPA400TR411I irn = 118089352', 'RTPA400TR411P irn = 118095352', 'RTPA400TR411Q irn = 118086352', 'RTPA400TR411U irn = 118090352', 'RTPA400TR412I irn = 118479352', 'RTPA400TR412P irn = 118486352', 'RTPA400TR412Q irn = 118487352', 'RTPA400TR412U irn = 118490352', 'RTPB110TR411I irn = 104620352', 'RTPB110TR411P irn = 104684352', 'RTPB110TR411Q irn = 104674352', 'RTPB110TR411U irn = 104623352', 'RTPB110TR412I irn = 108888352', 'RTPB110TR412P irn = 108917352', 'RTPB110TR412Q irn = 108893352', 'RTPB110TR412U irn = 108887352', 'RTPB400TR411I irn = 103677352', 'RTPB400TR411P irn = 103693352', 'RTPB400TR411Q irn = 103682352', 'RTPB400TR411U irn = 103696352', 'RTPB400TR412I irn = 104015352', 'RTPB400TR412P irn = 104016352', 'RTPB400T

In [21]:
headers = {112981352: 'RTPA110TR411I', 112982352: 'RTPA110TR411P', 112983352: 'RTPA110TR411Q', 112972352: 'RTPA110TR411U', 113163352: 'RTPA110TR412I', 113156352: 'RTPA110TR412P', 113320352: 'RTPA110TR412Q', 112783352: 'RTPA110TR412U', 118089352: 'RTPA400TR411I', 118095352: 'RTPA400TR411P', 118086352: 'RTPA400TR411Q', 118090352: 'RTPA400TR411U', 118479352: 'RTPA400TR412I', 118486352: 'RTPA400TR412P', 118487352: 'RTPA400TR412Q', 118490352: 'RTPA400TR412U', 104620352: 'RTPB110TR411I', 104684352: 'RTPB110TR411P', 104674352: 'RTPB110TR411Q', 104623352: 'RTPB110TR411U', 108888352: 'RTPB110TR412I', 108917352: 'RTPB110TR412P', 108893352: 'RTPB110TR412Q', 108887352: 'RTPB110TR412U', 103677352: 'RTPB400TR411I', 103693352: 'RTPB400TR411P', 103682352: 'RTPB400TR411Q', 103696352: 'RTPB400TR411U', 104015352: 'RTPB400TR412I', 104016352: 'RTPB400TR412P', 104012352: 'RTPB400TR412Q', 104014352: 'RTPB400TR412U'}

In [83]:
def read_meritve_csv(file_path: str, skip_lines: int) -> pd.DataFrame:
    data = pd.read_csv(file_path, skiprows=skip_lines, delimiter=";", dtype={"SYSTIME": str})
    data = data.iloc[:, :-1] # odstranimo zadnji stolpec
    print(data.dtypes)
    data.drop(columns=["TIMESTAMP"], inplace=True)
    # uredimo systime v datetime format
    data["SYSTIME"] = pd.to_datetime(data["SYSTIME"], format="%Y%m%d%H%M%S%f")
    data["OBE_IRN"] = data["OBE_IRN"].map(headers)
    data.dropna(axis=0, how="any", inplace=True)
    return data

In [84]:
data = read_meritve_csv("data/davr15m_1.csv", skip_lines=len(h1) + 1)

OBE_IRN           int64
SYSTIME          object
VALUE           float64
QST_NO            int64
TIMESTAMP       float64
CALC_COUNTER      int64
dtype: object


In [81]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 171826 entries, 0 to 171829
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   OBE_IRN       171826 non-null  object        
 1   SYSTIME       171826 non-null  datetime64[ns]
 2   VALUE         171826 non-null  float64       
 3   QST_NO        171826 non-null  int64         
 4   CALC_COUNTER  171826 non-null  int64         
dtypes: datetime64[ns](1), float64(1), int64(2), object(1)
memory usage: 7.9+ MB


In [82]:
data.head()

Unnamed: 0,OBE_IRN,SYSTIME,VALUE,QST_NO,CALC_COUNTER
0,RTPB400TR411I,2017-03-01,184.907222,1,15
1,RTPB400TR411Q,2017-03-01,20.284333,1,15
2,RTPB400TR411P,2017-03-01,130.111888,1,15
3,RTPB400TR411U,2017-03-01,411.623557,1,15
4,RTPB400TR412Q,2017-03-01,17.208333,1,15


## Read excel

In [86]:
file= "data/davr15m_3.xls"

xls = pd.ExcelFile(file)

print(xls)

<pandas.io.excel._base.ExcelFile object at 0x000001E1BAC79B20>


In [87]:
xls.sheet_names

['Obnovljeno_List1']

In [88]:
df = pd.read_excel(xls, 'Obnovljeno_List1')

In [89]:
df

Unnamed: 0,Čas,Postaja,Podsistem,EXID,Vrednost,Kvaliteta
0,13.10.2021 10:00:00,RTPC,TOBER,RTPC110POLJEDI,0,**
1,13.10.2021 11:00:00,RTPC,TOBER,RTPC110POLJEDI,0,**


In [92]:
df = pd.read_excel("data/davr15m_3.xls", 'Obnovljeno_List1')
df = pd.read_excel("data/davr15m_3.xls", 'Obnovljeno_List2')

In [91]:
df

Unnamed: 0,Čas,Postaja,Podsistem,EXID,Vrednost,Kvaliteta
0,13.10.2021 10:00:00,RTPC,TOBER,RTPC110POLJEDI,0,**
1,13.10.2021 11:00:00,RTPC,TOBER,RTPC110POLJEDI,0,**
