In [10]:
import pandas as pd
from typing import List
import re



In [13]:
def read_irn_header_csv(file_path: str) -> List[str]:
    RE_IRNS = re.compile(r"[\w\d]+\sirn\s=\s\d+")
    irns = []
    with open(file_path, "r") as input_file:
        for line in input_file:
            if RE_IRNS.search(line):
                irns.append(line.strip())
            else:
                break
                
    return irns


In [12]:
# read irn header
h1 = read_irn_header_csv("data/davr15m_1.csv")
h2 = read_irn_header("data/davr15m_2.csv")

print(h1)
print(h2)

['RTPA110TR411I irn = 112981352', 'RTPA110TR411P irn = 112982352', 'RTPA110TR411Q irn = 112983352', 'RTPA110TR411U irn = 112972352', 'RTPA110TR412I irn = 113163352', 'RTPA110TR412P irn = 113156352', 'RTPA110TR412Q irn = 113320352', 'RTPA110TR412U irn = 112783352', 'RTPA400TR411I irn = 118089352', 'RTPA400TR411P irn = 118095352', 'RTPA400TR411Q irn = 118086352', 'RTPA400TR411U irn = 118090352', 'RTPA400TR412I irn = 118479352', 'RTPA400TR412P irn = 118486352', 'RTPA400TR412Q irn = 118487352', 'RTPA400TR412U irn = 118490352', 'RTPB110TR411I irn = 104620352', 'RTPB110TR411P irn = 104684352', 'RTPB110TR411Q irn = 104674352', 'RTPB110TR411U irn = 104623352', 'RTPB110TR412I irn = 108888352', 'RTPB110TR412P irn = 108917352', 'RTPB110TR412Q irn = 108893352', 'RTPB110TR412U irn = 108887352', 'RTPB400TR411I irn = 103677352', 'RTPB400TR411P irn = 103693352', 'RTPB400TR411Q irn = 103682352', 'RTPB400TR411U irn = 103696352', 'RTPB400TR412I irn = 104015352', 'RTPB400TR412P irn = 104016352', 'RTPB400T

In [21]:
headers = {112981352: 'RTPA110TR411I', 112982352: 'RTPA110TR411P', 112983352: 'RTPA110TR411Q', 112972352: 'RTPA110TR411U', 113163352: 'RTPA110TR412I', 113156352: 'RTPA110TR412P', 113320352: 'RTPA110TR412Q', 112783352: 'RTPA110TR412U', 118089352: 'RTPA400TR411I', 118095352: 'RTPA400TR411P', 118086352: 'RTPA400TR411Q', 118090352: 'RTPA400TR411U', 118479352: 'RTPA400TR412I', 118486352: 'RTPA400TR412P', 118487352: 'RTPA400TR412Q', 118490352: 'RTPA400TR412U', 104620352: 'RTPB110TR411I', 104684352: 'RTPB110TR411P', 104674352: 'RTPB110TR411Q', 104623352: 'RTPB110TR411U', 108888352: 'RTPB110TR412I', 108917352: 'RTPB110TR412P', 108893352: 'RTPB110TR412Q', 108887352: 'RTPB110TR412U', 103677352: 'RTPB400TR411I', 103693352: 'RTPB400TR411P', 103682352: 'RTPB400TR411Q', 103696352: 'RTPB400TR411U', 104015352: 'RTPB400TR412I', 104016352: 'RTPB400TR412P', 104012352: 'RTPB400TR412Q', 104014352: 'RTPB400TR412U'}

In [83]:
def read_meritve_csv(file_path: str, skip_lines: int) -> pd.DataFrame:
    data = pd.read_csv(file_path, skiprows=skip_lines, delimiter=";", dtype={"SYSTIME": str})
    data = data.iloc[:, :-1] # odstranimo zadnji stolpec
    print(data.dtypes)
    data.drop(columns=["TIMESTAMP"], inplace=True)
    # uredimo systime v datetime format
    data["SYSTIME"] = pd.to_datetime(data["SYSTIME"], format="%Y%m%d%H%M%S%f")
    data["OBE_IRN"] = data["OBE_IRN"].map(headers)
    data.dropna(axis=0, how="any", inplace=True)
    return data

In [84]:
data = read_meritve_csv("data/davr15m_1.csv", skip_lines=len(h1) + 1)

OBE_IRN           int64
SYSTIME          object
VALUE           float64
QST_NO            int64
TIMESTAMP       float64
CALC_COUNTER      int64
dtype: object


In [81]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 171826 entries, 0 to 171829
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   OBE_IRN       171826 non-null  object        
 1   SYSTIME       171826 non-null  datetime64[ns]
 2   VALUE         171826 non-null  float64       
 3   QST_NO        171826 non-null  int64         
 4   CALC_COUNTER  171826 non-null  int64         
dtypes: datetime64[ns](1), float64(1), int64(2), object(1)
memory usage: 7.9+ MB


In [82]:
data.head()

Unnamed: 0,OBE_IRN,SYSTIME,VALUE,QST_NO,CALC_COUNTER
0,RTPB400TR411I,2017-03-01,184.907222,1,15
1,RTPB400TR411Q,2017-03-01,20.284333,1,15
2,RTPB400TR411P,2017-03-01,130.111888,1,15
3,RTPB400TR411U,2017-03-01,411.623557,1,15
4,RTPB400TR412Q,2017-03-01,17.208333,1,15


# Meritve

In [1]:
import pandas as pd
from sqlalchemy import create_engine

In [2]:
engine = create_engine(f"sqlite:///data/obratovalni_podatki.db")

In [4]:
data = pd.read_sql_table("meritve", con=engine)

In [5]:
data.head()

Unnamed: 0,OBE_IRN,SYSTIME,VALUE,QST_NO,CALC_COUNTER
0,RTPB400TR411I,2017-03-01,184.907222,1,15
1,RTPB400TR411Q,2017-03-01,20.284333,1,15
2,RTPB400TR411P,2017-03-01,130.111888,1,15
3,RTPB400TR411U,2017-03-01,411.623557,1,15
4,RTPB400TR412Q,2017-03-01,17.208333,1,15


In [81]:
data = pd.read_sql_table("meritve", con=engine)
data["merjena_vrednost"] = data["OBE_IRN"].str.extract(r"(TEMP|[P,Q,I,U,T])$")
data["Lokacija"] = data["OBE_IRN"].str.replace(r"(TEMP|[P,Q,I,U,T])$", "", regex=True)
unique_lokacije = data["Lokacija"].unique()
pivoted_data = data.pivot_table(index=["Lokacija", "SYSTIME"], columns=["merjena_vrednost"], values="VALUE")
pivoted_data.head(24)

Unnamed: 0_level_0,merjena_vrednost,I,P,Q,TEMP,U
Lokacija,SYSTIME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RTPA110TR411,2017-03-01 00:00:00,368.853999,-74.266889,16.415,,118.968446
RTPA110TR411,2017-03-01 00:15:00,341.035664,-69.185778,13.821778,,119.463778
RTPA110TR411,2017-03-01 00:30:00,353.100662,-71.377444,13.411222,,118.884554
RTPA110TR411,2017-03-01 00:45:00,365.88455,-73.80489,13.976889,,118.699997
RTPA110TR411,2017-03-01 01:00:00,360.739776,-72.808111,14.431778,,118.840445
RTPA110TR411,2017-03-01 01:15:00,339.430554,-68.717223,13.883222,,119.221111
RTPA110TR411,2017-03-01 01:30:00,326.018443,-65.756667,13.230889,,118.800003
RTPA110TR411,2017-03-01 01:45:00,322.203775,-65.044666,13.429222,,118.957778
RTPA110TR411,2017-03-01 02:00:00,312.195439,-62.969,13.359556,,119.000111
RTPA110TR411,2017-03-01 02:15:00,312.065001,-62.950001,13.896778,,119.455556


In [46]:
small_data = pivoted_data.head(24)

In [53]:
small_data.loc["RTPA110TR411",:].resample("H").mean()

merjena_vrednost,I,P,Q,TEMP,U
SYSTIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-03-01 00:00:00,357.218719,-72.15875,14.406222,,119.004194
2017-03-01 01:00:00,337.098137,-68.081667,13.743778,,118.954834
2017-03-01 02:00:00,312.7135,-63.123667,13.903278,,119.318556
2017-03-01 03:00:00,307.191251,-61.458139,15.681111,,119.366278
2017-03-01 04:00:00,304.176138,-59.972666,18.181861,,118.958778
2017-03-01 05:00:00,315.619222,-62.459778,17.756472,,118.8035


In [None]:
# velikost objekta

In [2]:
data = "sddsdssddsds"

import sys
sys.getsizeof(data)

61

## API klic

https://docs.python-requests.org/en/latest/

In [88]:
import requests
from bs4 import BeautifulSoup

In [90]:
data = requests.get("http://meteo.arso.gov.si/met/sl/weather/observ/surface/")

soup = BeautifulSoup(data.text, 'html.parser')

In [123]:
soup.find_all("table")[0].find_all("table")[4]

<table class="sirina_strani_ac" id="leg">
<tr>
<td class="legLeft">
<div class="heading" id="nogaLog">
<div class="padding">
<div class="siteFunctions"><a class="myAccountLink" href="/met/sl/weather/observ/surface?op=auth;method=init">Visitor</a> <a class="loginToggleLink" href="/met/sl/weather/observ/surface?op=auth;method=init">Log in.</a><br/> </div>
<div class="copyright">© 2021 ARSO</div>
<div class="endFloat"></div>
</div>
</div>
</td>
<td celspan="2" class="legCenter">
<!-- noga -->
<div id="noga">
<div>Ministrstvo za okolje in prostor<br/> AGENCIJA REPUBLIKE SLOVENIJE ZA OKOLJE<br/>
            Vojkova 1b, SI-1000 Ljubljana, Slovenja Tel: +386 1 4784 000 Fax: +386 1 4784 052<br/>
<a href="//www.arso.gov.si/Izjava_o_dostopnosti.html" title="Izjava o dostopnosti">Izjava o dostopnosti</a></div>
</div>
<!-- /noga -->
</td>
</tr>
</table>

In [128]:
data = requests.get("http://opendata.si/promet/bicikelj/list/").json()

In [134]:
[(postaja[1]['name'], postaja[1]['station']['available']) for postaja in data['markers'].items()]

[('PREŠERNOV TRG-PETKOVŠKOVO NABREŽJE', '18'),
 ('POGAČARJEV TRG-TRŽNICA', '13'),
 ('KONGRESNI TRG-ŠUBIČEVA ULICA', '4'),
 ('CANKARJEVA UL.-NAMA', '9'),
 ('BREG', '15'),
 ('GRUDNOVO NABREŽJE-KARLOVŠKA C.', '12'),
 ('MIKLOŠIČEV PARK', '8'),
 ('BAVARSKI DVOR', '8'),
 ('TRG OF-KOLODVORSKA UL.', '1'),
 ('MASARYKOVA DDC', '6'),
 ('VILHARJEVA CESTA', '11'),
 ('PARK NAVJE-ŽELEZNA CESTA', '1'),
 ('TRG MDB', '15'),
 ('PARKIRIŠČE NUK 2-FF', '3'),
 ('AMBROŽEV TRG', '11'),
 ('GH ŠENTPETER-NJEGOŠEVA C.', '12'),
 ('ILIRSKA ULICA', '6'),
 ('TRŽAŠKA C.-ILIRIJA', '6'),
 ('TIVOLI', '5'),
 ('STARA CERKEV', '2'),
 ('KINO ŠIŠKA', '2'),
 ('ŠPICA', '9'),
 ('BARJANSKA C.-CENTER STAREJŠIH TRNOVO', '10'),
 ('ZALOŠKA C.-GRABLOVIČEVA C.', '12'),
 ('TRŽNICA MOSTE', '3'),
 ('ROŽNA DOLINA-ŠKRABČEVA UL.', '7'),
 ('DUNAJSKA C.-PS PETROL', '0'),
 ('PLEČNIKOV STADION', '12'),
 ('DUNAJSKA C.-PS MERCATOR', '3'),
 ('LIDL - VOJKOVA CESTA', '0'),
 ('ŠPORTNI CENTER STOŽICE', '6'),
 ('KOPRSKA ULICA', '1'),
 ('MERCATOR CENTER Š