In [76]:
import os
import pandas as pd
import polars as pl
import time


script_dir = os.getcwd()
root_dir = os.path.dirname(script_dir)
data_dir = os.path.join(root_dir, "data")

In [77]:
# データの読み込み
ondotori_hkb = f"{data_dir}\\AirTemp_odt_hkb_2cm.csv"


# pandas
df_pd = pd.read_csv(ondotori_hkb, encoding="shift-jis")

df_pd

Unnamed: 0,Date/Time,Date/Time.1,No.1
0,Date/Time,Date/Time,OFFICE 1
1,,,°C
2,2021/8/15 15:00,44423.625,20.5
3,2021/8/15 16:00,44423.66667,21.3
4,2021/8/15 17:00,44423.70833,19.8
...,...,...,...
27401,,,
27402,,,
27403,,,
27404,,,


In [78]:
# polars
df = pl.read_csv(ondotori_hkb, encoding="utf8-lossy", columns=["Date/Time", "No.1"])


# カラム名の変更と余計な行の削除
df = df.rename({
    "Date/Time": 'TIMESTAMP',
    "No.1": "Temp",
}).slice(2).filter(~pl.col("TIMESTAMP").is_null())

# 単位系
unit_dict = {
    "TIMESTAMP": "TS",
    "Temp": "DegC",
}


print(df.columns)
print(unit_dict)
df

['TIMESTAMP', 'Temp']
{'TIMESTAMP': 'TS', 'Temp': 'DegC'}


TIMESTAMP,Temp
str,str
"""2021/8/15 15:00""","""20.5"""
"""2021/8/15 16:00""","""21.3"""
"""2021/8/15 17:00""","""19.8"""
"""2021/8/15 18:00""","""21.4"""
"""2021/8/15 19:00""","""11.2"""
…,…
"""2024/7/13 7:00""","""10.2"""
"""2024/7/13 8:00""","""12.1"""
"""2024/7/13 9:00""","""16.3"""
"""2024/7/13 10:00""","""17.1"""


In [None]:
# scan

df_lazy = (
    pl.scan_csv(ondotori_hkb, encoding="utf8-lossy")
    .select(["Date/Time", "No.1"])
    .rename({"Date/Time": 'TIMESTAMP',"No.1": "Temp",})
    .slice(2)
    .filter(~pl.col("TIMESTAMP").is_null())
)

# 単位系の取得
unit_dict = {
    "TIMESTAMP": "TS",
    "Temp": "DegC",
}
print(unit_dict)

# TIMESTAMPがnullの行をログ出力
df_null_timestamp = df_lazy.filter(pl.col("TIMESTAMP").is_null()).collect()
print(df_null_timestamp)

# TIMESTAMPをUNIXTIMEに変換
df_lazy = df_lazy.with_columns(
    pl.col("TIMESTAMP").str.to_datetime("%Y/%m/%d %H:%M", strict=False).dt.epoch()
    )

# UNIXTIMEに変換出来なかった行をログ出力
df_null_unixtime = df_lazy.filter(pl.col("TIMESTAMP").is_null()).collect()
print(df_null_unixtime)


df = df_lazy.collect()

df

{'TIMESTAMP': 'TS', 'Temp': 'DegC'}
shape: (0, 2)
┌───────────┬──────┐
│ TIMESTAMP ┆ Temp │
│ ---       ┆ ---  │
│ str       ┆ str  │
╞═══════════╪══════╡
└───────────┴──────┘
shape: (0, 2)
┌───────────┬──────┐
│ TIMESTAMP ┆ Temp │
│ ---       ┆ ---  │
│ i64       ┆ str  │
╞═══════════╪══════╡
└───────────┴──────┘


TIMESTAMP,Temp
i64,str
1629039600000000,"""20.5"""
1629043200000000,"""21.3"""
1629046800000000,"""19.8"""
1629050400000000,"""21.4"""
1629054000000000,"""11.2"""
…,…
1720854000000000,"""10.2"""
1720857600000000,"""12.1"""
1720861200000000,"""16.3"""
1720864800000000,"""17.1"""
