In [22]:
import json, sys, datetime
from cassandra.cluster import Cluster
from cassandra.concurrent import execute_concurrent_with_args

In [17]:
cluster = Cluster(['127.0.0.1']) 
session = cluster.connect()

In [18]:
session.set_keyspace('data_stock')

In [19]:
session.execute("""
     CREATE TABLE IF NOT EXISTS smartset_finan_data (
                    symbol text,
                    date date,
                    securityType text,
                    adjustedPriceFlag text,
                    prior decimal,
                    open decimal,
                    high decimal,
                    low decimal,
                    close decimal,
                    average decimal,
                    aomVolume double,
                    aomValue double,
                    trVolume double,
                    trValue double,
                    totalVolume double,
                    totalValue double,
                    pe double,
                    pbv double,
                    bvps double,
                    dividendYield double,
                    marketCap double,
                    volumeTurnover double,
                    PRIMARY KEY (symbol, date)
                ) WITH CLUSTERING ORDER BY (date DESC);
""")


<cassandra.cluster.ResultSet at 0x25267f2ad10>

In [21]:
ps = session.prepare("""
INSERT INTO smartset_finan_data (
    symbol, date, securityType, adjustedPriceFlag,
    prior, open, high, low, close, average,
    aomVolume, aomValue, trVolume, trValue,
    totalVolume, totalValue, pe, pbv, bvps,
    dividendYield, marketCap, volumeTurnover
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""")

In [25]:
import json, sys, datetime
from decimal import Decimal, InvalidOperation
from cassandra.concurrent import execute_concurrent_with_args

def parse_date(s):
    if s is None: 
        return None
    return datetime.datetime.strptime(s, "%Y-%m-%d").date()

def to_float(x):
    return None if x is None else float(x)

def to_decimal(x):
    if x is None:
        return None
    # ใช้ str() เพื่อหลีกเลี่ยงปัญหา float representation
    try:
        return Decimal(str(x))
    except (InvalidOperation, ValueError, TypeError):
        return None

def row_to_params(d):
    # ตรวจคีย์หลักก่อน
    sym = d.get("symbol")
    dt  = parse_date(d.get("date"))
    if not sym or dt is None:
        return None  # ให้ไปกรองทิ้งข้างล่าง

    return (
        sym,
        dt,
        d.get("securityType"),
        d.get("adjustedPriceFlag"),

        # decimal ควรใช้ Decimal
        to_decimal(d.get("prior")),
        to_decimal(d.get("open")),
        to_decimal(d.get("high")),
        to_decimal(d.get("low")),
        to_decimal(d.get("close")),
        to_decimal(d.get("average")),

        # double ใช้ float
        to_float(d.get("aomVolume")),
        to_float(d.get("aomValue")),
        to_float(d.get("trVolume")),
        to_float(d.get("trValue")),
        to_float(d.get("totalVolume")),
        to_float(d.get("totalValue")),
        to_float(d.get("pe")),
        to_float(d.get("pbv")),
        to_float(d.get("bvps")),
        to_float(d.get("dividendYield")),
        to_float(d.get("marketCap")),
        to_float(d.get("volumeTurnover")),
    )

# 4) โหลดไฟล์ JSON (รองรับ array หรือ NDJSON)
records = []
with open("sets_eod_price.json", "r", encoding="utf-8") as f:
    first = f.read(1)
    f.seek(0)
    if first == '[':
        records = json.load(f)
    else:
        for line in f:
            line = line.strip()
            if line:
                records.append(json.loads(line))

# map -> params และกรองเรคอร์ดที่คีย์หลักหาย
params = [row_to_params(r) for r in records]
args = [p for p in params if p is not None]

# 5) เขียนลง Cassandra (concurrent เพื่อความเร็ว)
results = execute_concurrent_with_args(session, ps, args, concurrency=64)

# สรุปผล
ok = sum(1 for (success, _) in results if success)
fail = len(results) - ok
print(f"Inserted: {ok}, Failed: {fail}")
if fail:
    for success, err in results:
        if not success:
            print("Error:", err, file=sys.stderr)


Inserted: 3038, Failed: 0
