In [2]:
import os
import polars as pl
from pathlib import Path
import pandas as pd
import numpy as np

In [3]:
downloaded = os.listdir('data/datove_sady_stk/')
len(downloaded)

2183

In [4]:
downloaded = os.listdir('data/data_z_mericich_pristroju/')
len(downloaded)

2179

In [None]:
extracted = os.listdir('stazeni/vybrane_datove_sady_stk/')
len(extracted)

2183

In [4]:
parsed = os.listdir('data/parquet/prohlidky')
len(parsed)

2183

In [12]:
for file in downloaded:
    path = os.path.join('stazeni/datove_sady_stk/', file)
    os.remove(path)

In [5]:
for file in extracted:
    path = os.path.join('stazeni/vybrane_datove_sady_stk/', file)
    os.remove(path)

In [9]:
# 1. Define the unified schema for the conflicting column(s)
prohlidky_schema = {
    # --- Primary Inspection Fields ---
    "CisloProtokolu": pl.String,
    "DatumProhlidky": pl.String, # Assuming date is stored as 'YYYY-MM-DD' string initially
    "DruhProhlidky": pl.String,
    "RozsahProhlidky": pl.String, # Fixes the SchemaError (enforcing String type)
    "Prohlidka_OdpovednaOsoba": pl.String, # Employee ID (stored as string)
    
    # --- Stanice (Station) Fields ---
    "Prohlidka_Stanice_Cislo": pl.String,
    "Prohlidka_Stanice_Kraj": pl.String,
    "Prohlidka_Stanice_ORP": pl.String,
    "Prohlidka_Stanice_Obec": pl.String,

    # --- Casove Udaje (Timestamp Fields) ---
    "Prohlidka_Zahajeni": pl.String,
    "Prohlidka_Ukonceni": pl.String,

    # --- Administrativni Oprava Fields ---
    "AdministrativniOprava_CisloProtokolu": pl.String,
    "AdministrativniOprava_DatumProhlidky": pl.String,

    # --- Vozidlo (Vehicle) Fields ---
    "Vozidlo_Vin": pl.String,
    "Vozidlo_Druh": pl.String,
    "Vozidlo_Kategorie": pl.String,
    "Vozidlo_Provedeni": pl.String,
    "Vozidlo_Znacka": pl.String,
    "Vozidlo_ObchodniOznaceni": pl.String,
    "Vozidlo_TypMotoru": pl.String,

    # --- Registrace (Registration) Fields ---
    "Registrace_DatumPrvni": pl.String,
    "Registrace_Stat": pl.String,
    "Registrace_CisloDokladu": pl.String,

    # --- Emisni Cast (Emissions) Fields ---
    "Emise_CisloProtokolu": pl.String,
    "Emise_DatumProhlidky": pl.String,
    "Emise_Stanice_Cislo": pl.String,
    "Emise_Stanice_Kraj": pl.String,
    "Emise_Stanice_ORP": pl.String,
    "Emise_Stanice_Obec": pl.String,
    "Emise_Zahajeni": pl.String,
    "Emise_Ukonceni": pl.String,
    "Emise_OdpovednaOsoba": pl.String,
    "Emise_ZakladniPalivo": pl.String,
    "Emise_AlternativniPalivo": pl.String,
    "Emise_EmisniSystem": pl.String,
    "Emise_VyrobceMotoru": pl.String,
    "Emise_CisloMotoru": pl.String,
    "Emise_RokVyroby": pl.String,

    # --- Technicka Cast (Technical) Fields ---
    "Technicka_Zahajeni": pl.String,
    "Technicka_Ukonceni": pl.String,
    "Technicka_OdpovednaOsoba": pl.String,

    # --- Adr Cast (ADR) Fields ---
    "Adr_Zahajeni": pl.String,
    "Adr_Ukonceni": pl.String,
    "Adr_OdpovednaOsoba": pl.String,
    "Adr_Platnost_Periodicka": pl.String,
    "Adr_Platnost_Meziperiodicka": pl.String,
    "Adr_KodCisterny": pl.String,
    "Adr_CisloOsvedceni": pl.String,
    "Adr_ZavadyText": pl.String,
    "Adr_Poznamka": pl.String,

    # --- Tsk Cast (Specific Inspection) Fields ---
    "Tsk_Zahajeni": pl.String,
    "Tsk_Ukonceni": pl.String,
    "Tsk_OdpovednaOsoba": pl.String,

    # --- Vysledek (Result) Fields ---
    "Vysledek_Odometr": pl.String, # Odometer readings often stored as strings/integers
    "Vysledek_Poznamka": pl.String,
    "Vysledek_DatumPristiProhlidky": pl.String, # Date stored as string
    "Vysledek_NalepkaVylepena": pl.String,
    "Vysledek_Celkovy": pl.String,
}

# 2. Start a lazy query plan, enforcing the schema
lazy_df = pl.scan_parquet(
    "data/parquet/prohlidky/*.parquet",
    schema=prohlidky_schema  # Enforce the defined schema
)

# 3. Apply filters/transformations (Your query logic)
result_query = (
    lazy_df
    .filter(pl.col("Prohlidka_Stanice_Obec") == "Hlinsko")
    .filter(pl.col('Vozidlo_Znacka') == 'TESLA')
    .sort('Technicka_Zahajeni')
    .limit(1000)
)

# 4. Execute the query
final_result = result_query.collect()
display(final_result[-10:])

CisloProtokolu,DatumProhlidky,DruhProhlidky,RozsahProhlidky,Prohlidka_OdpovednaOsoba,Prohlidka_Stanice_Cislo,Prohlidka_Stanice_Kraj,Prohlidka_Stanice_ORP,Prohlidka_Stanice_Obec,Prohlidka_Zahajeni,Prohlidka_Ukonceni,AdministrativniOprava_CisloProtokolu,AdministrativniOprava_DatumProhlidky,Vozidlo_Vin,Vozidlo_Druh,Vozidlo_Kategorie,Vozidlo_Provedeni,Vozidlo_Znacka,Vozidlo_ObchodniOznaceni,Vozidlo_TypMotoru,Registrace_DatumPrvni,Registrace_Stat,Registrace_CisloDokladu,Emise_CisloProtokolu,Emise_DatumProhlidky,Emise_Stanice_Cislo,Emise_Stanice_Kraj,Emise_Stanice_ORP,Emise_Stanice_Obec,Emise_Zahajeni,Emise_Ukonceni,Emise_OdpovednaOsoba,Emise_ZakladniPalivo,Emise_AlternativniPalivo,Emise_EmisniSystem,Emise_VyrobceMotoru,Emise_CisloMotoru,Emise_RokVyroby,Technicka_Zahajeni,Technicka_Ukonceni,Technicka_OdpovednaOsoba,Adr_Zahajeni,Adr_Ukonceni,Adr_OdpovednaOsoba,Adr_Platnost_Periodicka,Adr_Platnost_Meziperiodicka,Adr_KodCisterny,Adr_CisloOsvedceni,Adr_ZavadyText,Adr_Poznamka,Tsk_Zahajeni,Tsk_Ukonceni,Tsk_OdpovednaOsoba,Vysledek_Odometr,Vysledek_Poznamka,Vysledek_DatumPristiProhlidky,Vysledek_NalepkaVylepena,Vysledek_Celkovy
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""CZ-3632-22-05-1351""","""2022-05-30""","""Pravidelná""","""Plný""","""27755""","""3632""","""Pardubický kraj""","""Hlinsko""","""Hlinsko""","""2022-05-30T07:18:51.7700000+02…","""2022-05-30T08:14:22.8400000+02…",,,"""5YJXCCE22HF041874""","""OSOBNÍ AUTOMOBIL""","""M1""","""AB HATCHBACK""","""TESLA""","""MODEL X""","""L2S""","""2017-06-09T00:00:00.0000000+02…","""Česká republika""","""UJ 238223""",,,,,,,,,,,,,,,,"""2022-05-30T07:46:42.0000000+02…","""2022-05-30T08:13:00.0000000+02…","""1108""",,,,,,,,,,,,,"""145206""",,"""2024-05-30""","""true""","""1"""
"""CZ-3632-23-01-0637""","""2023-01-26""","""Evidencní kontrola""","""Plný""","""27755""","""3632""","""Pardubický kraj""","""Hlinsko""","""Hlinsko""","""2023-01-26T11:18:58.2200000+01…","""2023-01-26T11:41:40.0870000+01…",,,"""5YJXCCE22HF041874""","""OSOBNÍ AUTOMOBIL""","""M1""","""AB HATCHBACK""","""TESLA""","""MODEL X""","""L2S""","""2017-06-09T00:00:00.0000000+02…","""Česká republika""","""UJ238223""",,,,,,,,,,,,,,,,"""2023-01-26T11:28:59.0000000+01…","""2023-01-26T11:41:07.0000000+01…","""92433""",,,,,,,,,,,,,"""162956""",,,"""false""","""1"""
"""CZ-3632-23-09-0267""","""2023-09-11""","""Pravidelná""","""Plný""","""1109""","""3632""","""Pardubický kraj""","""Hlinsko""","""Hlinsko""","""2023-09-11T08:32:05.2770000+02…","""2023-09-11T09:27:41.4330000+02…",,,"""5YJSA7H43FF081561""","""OSOBNÍ AUTOMOBIL""","""M1""","""AB HATCHBACK""","""TESLA""","""MODEL S""","""L2S/L1S""","""2015-06-15T00:00:00.0000000+02…","""Česká republika""","""UH 404600""",,,,,,,,,,,,,,,,"""2023-09-11T08:58:57.0000000+02…","""2023-09-11T09:25:08.0000000+02…","""1108""",,,,,,,,,,,,,"""328702""",,"""2025-09-11""","""true""","""1"""
"""CZ-3632-23-09-0286""","""2023-09-11""","""Pravidelná""","""Plný""","""27755""","""3632""","""Pardubický kraj""","""Hlinsko""","""Hlinsko""","""2023-09-11T10:48:43.8970000+02…","""2023-09-11T14:02:07.9000000+02…",,,"""5YJXCBE21GF004260""","""OSOBNÍ AUTOMOBIL""","""M1""","""AC KOMBI""","""TESLA""","""MODEL X P90D""","""E(2X)""","""2016-08-24T00:00:00.0000000+02…","""Česká republika""","""UL047624""",,,,,,,,,,,,,,,,"""2023-09-11T13:37:36.0000000+02…","""2023-09-11T13:58:08.0000000+02…","""1108""",,,,,,,,,,,,,"""254647""",,"""2025-09-11""","""true""","""1"""
"""CZ-3632-24-04-0072""","""2024-04-03""","""Pravidelná""","""Plný""","""27755""","""3632""","""Pardubický kraj""","""Hlinsko""","""Hlinsko""","""2024-04-03T09:19:03.3270000+02…","""2024-04-03T09:42:56.5070000+02…",,,"""5YJXCAE21HF052224""","""OSOBNÍ AUTOMOBIL""","""M1""","""AF VÍCEÚČELOVÉ VOZIDLO""","""TESLA""","""X 100D""","""E""","""2019-04-18T00:00:00.0000000+02…","""Česká republika""","""UBB945054""",,,,,,,,,,,,,,,,"""2024-04-03T09:20:25.0000000+02…","""2024-04-03T09:41:47.0000000+02…","""1108""",,,,,,,,,,,,,"""107861""",,"""2026-04-03""","""true""","""1"""
"""CZ-3632-24-04-0506""","""2024-04-16""","""Evidencní kontrola""","""Plný""","""27755""","""3632""","""Pardubický kraj""","""Hlinsko""","""Hlinsko""","""2024-04-16T07:26:01.2000000+02…","""2024-04-16T08:11:03.7630000+02…",,,"""5YJ3E7EBXMF825083""","""OSOBNÍ AUTOMOBIL""","""M1""","""AA SEDAN""","""TESLA""","""MODEL 3""","""3D3""","""2020-12-15T00:00:00.0000000+01…","""Česká republika""","""UBG784501""",,,,,,,,,,,,,,,,"""2024-04-16T07:53:30.0000000+02…","""2024-04-16T08:10:30.0000000+02…","""92433""",,,,,,,,,,,,,"""90761""",,,"""false""","""1"""
"""CZ-3632-24-05-0087""","""2024-05-03""","""Pravidelná""","""Plný""","""27755""","""3632""","""Pardubický kraj""","""Hlinsko""","""Hlinsko""","""2024-05-03T08:02:38.5000000+02…","""2024-05-03T09:33:19.0900000+02…",,,"""5YJ3E7EB7LF567636""","""OSOBNÍ AUTOMOBIL""","""M1""","""AA SEDAN""","""TESLA""","""MODEL 3(003)""","""3D3""","""2020-05-28T00:00:00.0000000+02…","""Česká republika""","""UAY749553""",,,,,,,,,,,,,,,,"""2024-05-03T09:11:39.0000000+02…","""2024-05-03T09:32:25.0000000+02…","""92433""",,,,,,,,,,,,,"""84859""",,"""2026-05-03""","""true""","""1"""
"""CZ-3632-24-05-0620""","""2024-05-17""","""Pravidelná""","""Plný""","""27755""","""3632""","""Pardubický kraj""","""Hlinsko""","""Hlinsko""","""2024-05-17T06:33:12.4700000+02…","""2024-05-17T09:40:16.3370000+02…",,,"""5YJ3E7EB7LF677649""","""OSOBNÍ AUTOMOBIL""","""M1""","""AA SEDAN""","""TESLA""","""MODEL 3""","""3D3""","""2020-04-28T00:00:00.0000000+02…","""Česká republika""","""UAY748865""",,,,,,,,,,,,,,,,"""2024-05-17T09:17:21.0000000+02…","""2024-05-17T09:38:04.0000000+02…","""1108""",,,,,,,,,,,,,"""95007""",,"""2026-05-17""","""true""","""1"""
"""CZ-3632-24-06-0458""","""2024-06-13""","""Pred registrací""","""Plný""","""1109""","""3632""","""Pardubický kraj""","""Hlinsko""","""Hlinsko""","""2024-06-13T13:33:31.2600000+02…","""2024-06-13T14:04:52.8200000+02…",,,"""5YJ3E7EAXLF767353""","""OSOBNÍ AUTOMOBIL""","""M1""","""AA SEDAN""","""TESLA""","""003 /E1R/""","""3D5""","""2020-08-31T00:00:00.0000000+02…","""Spolková republika Německo""","""FT913081""",,,,,,,,,,,,,,,,"""2024-06-13T13:42:14.0000000+02…","""2024-06-13T14:02:59.0000000+02…","""92433""",,,,,,,,,,,,,"""29991""",,"""2026-06-13""","""false""","""1"""
"""CZ-3632-24-09-0019""","""2024-09-02""","""Evidencní kontrola""","""Plný""","""109769""","""3632""","""Pardubický kraj""","""Hlinsko""","""Hlinsko""","""2024-09-02T09:47:07.1100000+02…","""2024-09-02T10:07:47.4870000+02…",,,"""5YJ3E7EB2KF201906""","""OSOBNÍ AUTOMOBIL""","""M1""","""AA SEDAN""","""TESLA""","""MODEL 3""","""3D3/3D5""","""2019-02-28T00:00:00.0000000+01…","""Belgické království""","""S279635515""",,,,,,,,,,,,,,,,"""2024-09-02T09:51:43.0000000+02…","""2024-09-02T10:04:42.0000000+02…","""1108""",,,,,,,,,,,,,"""170603""",,,"""false""","""1"""


In [5]:
mereni_2020_path = Path('../zdroje/ANALYZA DAT MERENI SME z ISTP/2020/sme')
csv_files = []
for f in mereni_2020_path.iterdir():
    if f.stem[-1: ] == 'V':
        csv_files.append(f)


data_frames = []
for csv_file in csv_files:
    data_frames.append(pd.read_csv(csv_file, delimiter=';'))

df = pd.concat(data_frames)


  df = pd.concat(data_frames)


In [None]:
print(list(df.columns))

['SME', 'DateTime', 'Technik', 'Pristroj', 'PristrojSW', 'VIN', 'Znacka', 'Obch.Ozn.', 'TypMotoru', 'Palivo', 'Kategorie', 'R.Vyroby', '1.registrace', 'StavKm', 'EmSystem', 'ProtokolOBD', 'PocetDTC', 'StatusMIL', 'Readiness-S', 'Readiness-C', 'CALID', 'CVN', 'VyhovujeZaver', 'VyhovujeVizual', 'ReadinessOK', 'TesnostPlyn', 'StatusECU', 'VizualStatusMIL', 'Zavady', 'Defekty', 'LimVolMin', 'LimVolMax', 'LimRefMin', 'LimRefMax', 'LimAccTime', 'LimK-stitek', 'LimDiffK', 'VyslNvol', 'VyslNref', 'VyslAccTime', 'VyslK', 'VyslDiffK', 'VyslLimK', 'NumAcc', 'NumExhaust', 'D11-Nvol', 'D11-Nref', 'D11-AccTime', 'D11-K', 'D11-TPS', 'D11-Temp', 'D21-Nvol', 'D21-Nref', 'D21-AccTime', 'D21-K', 'D21-TPS', 'D21-Temp', 'D12-Nvol', 'D12-Nref', 'D12-AccTime', 'D12-K', 'D12-TPS', 'D12-Temp', 'D22-Nvol', 'D22-Nref', 'D22-AccTime', 'D22-K', 'D22-TPS', 'D22-Temp', 'D13-Nvol', 'D13-Nref', 'D13-AccTime', 'D13-K', 'D13-TPS', 'D13-Temp', 'D23-Nvol', 'D23-Nref', 'D23-AccTime', 'D23-K', 'D23-TPS', 'D23-Temp', 'D14-Nv

In [10]:
df[np.all(df[['D11-AccTime', 'D12-AccTime', 'D13-AccTime', 'D14-AccTime']].values == [1.88, 1.84, 1.78, 1.70], axis=1)][['SME', 'DateTime', 'D11-AccTime','D12-AccTime', 'D13-AccTime', 'D14-AccTime', 'VIN']]

Unnamed: 0,SME,DateTime,D11-AccTime,D12-AccTime,D13-AccTime,D14-AccTime,VIN
73,540514.0,2020-01-03T06:40:00,1.88,1.84,1.78,1.7,VF3GJRHYK95052878
182,540514.0,2020-01-06T10:10:00,1.88,1.84,1.78,1.7,ZFA19200000105264
204,540514.0,2020-01-06T10:20:00,1.88,1.84,1.78,1.7,WBAAL71020KE07146
207,540514.0,2020-01-06T10:31:00,1.88,1.84,1.78,1.7,YV1SW720211084029
221,540514.0,2020-01-06T10:31:00,1.88,1.84,1.78,1.7,YV1SW720211084029
...,...,...,...,...,...,...,...
18378,540514.0,2020-08-28T09:51:00,1.88,1.84,1.78,1.7,WDB6013671P076453
18427,540514.0,2020-08-31T06:20:00,1.88,1.84,1.78,1.7,WVWZZZ1HZXK024417
18429,540514.0,2020-08-31T06:11:00,1.88,1.84,1.78,1.7,TMBBG41U432699607
18454,540514.0,2020-08-31T08:17:00,1.88,1.84,1.78,1.7,WVWZZZ3BZ1E253013


In [9]:

vin = "WVWZZZ3BZ1E253013"

# lazy scan, filter only VIN matches
df = (
    pl.scan_csv(
        "../zdroje/RSV_vypis_vozidel_20251101.csv",
        infer_schema_length=1000,
        truncate_ragged_lines=True,
        ignore_errors=True,
        quote_char=None
    )
    .filter(pl.any_horizontal(pl.all().cast(pl.Utf8).str.contains(vin)))
    .collect()
)

df.shape[0] > 0  # True if VIN exists

True

In [10]:
df

Datum 1 registrace,Datum 1 registrace v ČR,ZTP,ES EU,Druh vozidla,Druh vozidla 2 ř,Kategorie vozidla,Tovární značka,Typ,Varianta,Verze,VIN,Obchodní označení,Výrobce vozidla,Výrobce motoru,Typ motoru,Max výkon kW min⁻¹,Palivo,Zdvihový objem cm³,Plně elektrické vozidlo,Hybridní vozidlo,Třída hybridního vozidla,Emisní limit EHKOSN EHSES,Stupeň plnění emisní úrovně,Korigovaný součinitel absorbce m⁻¹,CO2 město mimo město kombinované g km-1,Specifické CO2,Snížení emisí – NEDC,Snížení emisí – WLTP,Spotřeba předpis,Spotřeba město mimo město kombinovaná l 100km⁻¹,Spotřeba při rychlosti l 100 km⁻¹,Spotřeba el mobil Wh km – Z,Dojezd ZR km,Výrobce karoserie,Druh (typ),Výrobní číslo karoserie,…,Faktor verifikace - Vf,Účel vozidla,Další záznamy,Alternativní provedení,Číslo TP,Číslo ORV,Druh RZ,Zařazení vozidla,Status,PČV,ABS,Airbag,ASR,Brzdy nouzová,Brzdy odlehčovací,Brzdy parkovací,Brzdy provozní,Doplňkový text na TP,Hmotnosti provozní Do,Hmotnosti zatížení SZ,Hmotnosti zatížení SZ typ,Hydropohon,Objem cisterny,Zatížení střechy,Číslo motoru,Nejvyšší rychlost omezení,Ovládání brzd SZ,Ovládání brzd SZ druh,Retarder,Rok výroby,Délka do,Ložná délka,Ložná šířka,Výška do,Typ kód,RM zániku,Stupeň autonomity vozidla
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""11.04.2001""","""25.10.2012""","""3483-137-00""","""e1*98/14*0157*""","""OSOBNÍ AUTOMOBIL""","""OSOBNÍ KOMBI""","""M1""","""VW""","""""/ 3BG""""",,,"""WVWZZZ3BZ1E253013""","""PASSAT VARIANT""","""""""""","""""VOLKSWAGEN""",""" AG""",""" WOLFSBURG""",""" SRN""""","""AVF""","""""96 / 4000""""","""NM""","""""1896""""","""NE""","""NE""",,""" / 1999/102A""",,"""""0""","""9""""",""" / / """,,,,"""""EU 1999/100""""","""""8.6 / 5.5 / 6.6""""","""""0 /""""",,…,"""""195/65 R 15 91 V / 6J X 15 H2…","""""78/ 3000""""","""74""","""201""",,,,,,"""Běžný provoz""","""""Variabilní provedení vozidla:…",""" ve znění pozdějších předpisů.…","""| - lapači nečistot""","""| - na výrobním štítku označen…","""|hmotností nebrzděného přívěsu…",,"""UE572454""","""UAS198182""","""STD. SILNIČNÍ""","""RSV""","""PROVOZOVANÉ""","""12347465""","""True""",,"""False""","""False""","""False""","""True""","""True""","""""DOVOZ SRN Č.POJ.:CZ/0008/8797…",,"""75""",,"""False""",,"""100""",
