In [132]:
import csv
import re

from array import array
from copy import deepcopy
from pathlib import Path
from typing import Iterable

In [135]:
type DType = bool | int | float | str
type DFrame = dict[str, Iterable]

In [None]:
def cast_type(x: str) -> DType:
    f = str

    if re.fullmatch(r"(?i)true|false", x):
        return x.lower() == "true"
    elif re.fullmatch(r"[+-]?\d+", x):
        f = int
    elif re.fullmatch(r"[+-]?(\d+\.\d*|\.\d+)([eE][+-]?\d+)?|[+-]?\d+[eE][+-]?\d+", x):
        f = float

    return f(x)


def parse_csv(
    path: Path, has_header: bool = True, encoding: str = "utf8", **kwargs
) -> DFrame:
    d = {}

    with open(path, mode="r", encoding=encoding) as f:
        for idx, row in enumerate(csv.reader(f, **kwargs)):
            if idx == 0:
                if has_header:
                    for header in row:
                        d[header] = []
                else:
                    for i, v in enumerate(row):
                        d[f"col_{i}"] = [cast_type(v)]

            else:
                for header, val in zip(d.keys(), row):
                    d[header].append(cast_type(val))
    d_arrays = {}

    for k, xs in d.items():
        if all(isinstance(v, bool) for v in xs):
            d_arrays[k] = array("b", xs)
        elif all(isinstance(v, int) for v in xs):
            d_arrays[k] = array("i", xs)
        elif all(isinstance(v, (int, float)) for v in xs):
            d_arrays[k] = array("d", (float(v) for v in xs))
        else:
            d_arrays[k] = xs

    return d_arrays

In [120]:
iris = parse_csv("iris.csv")
iris

{'sepal.length': array('d', [5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]),
 'sepal.width': array('d', [3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3

In [None]:
def columns(df: DFrame) -> set[str]:
    return set(df.keys())


def has_column(df: DFrame, col: str) -> bool:
    return col in columns(df)


columns(iris)
has_column(iris, "variety")

True

In [136]:
def add_column(frame: DFrame, col_name: str, data: Iterable) -> DFrame:
    new_frame = deepcopy(frame)
    new_frame[col_name] = data
    return new_frame

In [137]:
df_all = parse_csv("allTypes.csv")
df_all

{'is_active': array('b', [1, 0, 1, 0]),
 'age': array('i', [28, 35, 22, 30]),
 'weight': array('d', [72.5, 81.0, 65.2, 70.0]),
 'name': ['Alice', 'Bob', 'Charlie', 'Dana']}

In [None]:
ageTimesWeight = []

for a, w in zip(df_all["age"], df_all["weight"]):
    ageTimesWeight.append(a * w)

df_new = add_column(df_all, "testCol", ageTimesWeight)

In [141]:
df_new

{'is_active': array('b', [1, 0, 1, 0]),
 'age': array('i', [28, 35, 22, 30]),
 'weight': array('d', [72.5, 81.0, 65.2, 70.0]),
 'name': ['Alice', 'Bob', 'Charlie', 'Dana'],
 'testCol': [2030.0, 2835.0, 1434.4, 2100.0]}

In [142]:
df_all

{'is_active': array('b', [1, 0, 1, 0]),
 'age': array('i', [28, 35, 22, 30]),
 'weight': array('d', [72.5, 81.0, 65.2, 70.0]),
 'name': ['Alice', 'Bob', 'Charlie', 'Dana']}