In [1]:
import numpy as np
import pandas as pd

import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    f1_score, accuracy_score, balanced_accuracy_score, matthews_corrcoef,
    classification_report, confusion_matrix, log_loss
)
import plotly.graph_objects as go
from cassandra.cluster import Cluster
from datetime import datetime

In [2]:
cluster = Cluster(['127.0.0.1'])
session = cluster.connect()
session.set_keyspace('data_stock')

In [6]:
rows_fin = session.execute("""
    SELECT * FROM financal_data_fromsettradeAPI ALLOW FILTERING
""")
df_financial = pd.DataFrame(rows_fin)
df_financial.head(10)

Unnamed: 0,symbol,aumsize,average,change,eps,exchange,exerciseprice,exerciseratio,high,impliedvolatility,...,securitytype,status,theoretical,tolasttrade,totalbuyvolume,totalnosidevolume,totalsellvolume,totalvolume,underlying,underlyingprice
0,PPPM,,,,0.02,,,,,,...,CS,CB,,,0,0,0,0,,
1,TPCH,,,,0.24112,,,,,,...,CS,,,,0,0,0,0,,
2,KPNREIT,,,,,,,,,,...,CS,,,,0,0,0,0,,
3,POLY,,,,0.27632,,,,,,...,CS,,,,0,0,0,0,,
4,QHBREIT,,,,,,,,,,...,CS,,,,0,0,0,0,,
5,VCOM,,,,0.21428,,,,,,...,CS,,,,0,0,0,0,,
6,KDH,,,,4.12186,,,,,,...,CS,,,,0,0,0,0,,
7,NVD,,,,-0.00942,,,,,,...,CS,,,,0,0,0,0,,
8,JDF,,,,0.04224,,,,,,...,CS,,,,0,0,0,0,,
9,SVR,,,,-0.03402,,,,,,...,CS,,,,0,0,0,0,,


In [8]:
rows_price = session.execute("""
    SELECT * FROM candlestick_data ALLOW FILTERING
""")
df_price = pd.DataFrame(rows_price)
df_price['time'] = pd.to_datetime(df_price['time'])
df_price = df_price.sort_values(['symbol', 'time'])

In [9]:
df_price.head()

Unnamed: 0,symbol,time,close_price,high_price,low_price,open_price,value,volume
362761,24CS,2022-10-03,10.2,10.2,7.1,7.1,0.0,559474246
362762,24CS,2022-10-04,7.15,11.1,7.15,10.7,0.0,330716796
362763,24CS,2022-10-05,5.15,6.45,5.05,5.85,0.0,361105489
362764,24CS,2022-10-06,5.2,5.45,4.7,5.4,0.0,232684182
362765,24CS,2022-10-07,5.0,5.15,4.76,5.1,0.0,131780847


In [62]:
def compute_emas(df, spans=[5, 15, 35, 89, 200]):
    for span in spans:
        df[f'EMA{span}'] = df.groupby('symbol')['close_price'].transform(lambda x: x.ewm(span=span, adjust=False).mean())
    return df

df_price2 = compute_emas(df_price)


In [63]:
def compute_rsi_ema(df, window=14):
    def rsi_calc(prices):
        delta = prices.diff()
        gain = delta.clip(lower=0)
        loss = -delta.clip(upper=0)

        avg_gain = gain.ewm(alpha=1/window, min_periods=window).mean()
        avg_loss = loss.ewm(alpha=1/window, min_periods=window).mean()
        rs = avg_gain / avg_loss
        return 100 - (100 / (1 + rs))

    df['RSI'] = df.groupby('symbol')['close_price'].transform(rsi_calc)
    return df

df_price2 = compute_rsi_ema(df_price)


In [64]:
import pandas as pd
import numpy as np

# ✅ ค่า band_pct สำหรับตรวจสอบ sideway zone
band_pct = 0.015  # 1.5%

# ✅ ตัวอย่าง DataFrame ที่คุณมีอยู่แล้ว (แทนด้วย df_candle)
# ต้องมีคอลัมน์: Close, RSI, EMA5, EMA15, EMA35, EMA89, EMA200

def classify_trend(row):
    # ✅ A: Overbought / แรงมาก
    if row['close_price'] >= row['EMA5'] and row['RSI'] >= 70:
        return 'a'
    
    # ✅ B: ขาขึ้นชัดเจน
    elif row['close_price'] >= row['EMA35'] and row['EMA35'] >= row['EMA89']:
        return 'b'
    
    # ✅ C: Sideway เหนือ EMA89
    elif (row['close_price'] >= row['EMA89']):
        emas = [row['EMA5'], row['EMA15'], row['EMA35'], row['EMA89']]
        band = (max(emas) - min(emas)) / np.mean(emas)
        if band <= band_pct:
            return 'c'
    
    # ✅ D: ขาลง
    elif row['close_price'] < row['EMA89'] and row['close_price'] < row['EMA200'] and row['EMA89'] < row['EMA200']:
        return 'd'
    
    # ✅ E: Crash / ร่วงหนัก
    elif (
        row['close_price'] < row['EMA5'] < row['EMA15'] < row['EMA35'] < row['EMA89'] < row['EMA200']
        and row['RSI'] <= 30
    ):
        return 'e'
    
    # ✅ Default: ไม่เข้าเงื่อนไขใดเลย
    return 'unclassified'

# ✅ Apply ฟังก์ชันกับ DataFrame
df_price2['trend_group'] = df_price2.apply(classify_trend, axis=1)


In [65]:
df_price2.head()

Unnamed: 0,symbol,time,close_price,volume,EMA5,EMA15,EMA35,EMA89,EMA200,RSI,trend_group
0,PPPM,2022-06-13,2.31114,18958630,2.31114,2.31114,2.31114,2.31114,2.31114,,b
1,PPPM,2022-06-14,2.13336,36479858,2.25188,2.288918,2.301263,2.307189,2.309371,,d
2,PPPM,2022-06-15,2.13336,35826032,2.212373,2.269473,2.291935,2.303327,2.30762,,d
3,PPPM,2022-06-16,2.31114,28035706,2.245296,2.274681,2.293002,2.3035,2.307655,,unclassified
4,PPPM,2022-06-17,2.31114,21671802,2.267244,2.279239,2.29401,2.30367,2.307689,,unclassified


eps,pe,pbv,percentYield,marketcap

In [19]:
# ดึงข้อมูลจาก Cassandra
rows_fin2 = session.execute("""
    SELECT symbol, eps, pe, pbv, percentYield
    FROM financal_data_fromsettradeAPI
    ALLOW FILTERING
""")

# แปลงเป็น DataFrame
df_financial2 = pd.DataFrame(rows_fin2, columns=["symbol", "eps", "pe", "pbv", "percentYield"])

# แสดง 5 แถวแรก
df_financial2.head()


Unnamed: 0,symbol,eps,pe,pbv,percentYield
0,PPPM,0.02,0.0,0.7,0.0
1,TPCH,0.24112,7.9,0.34,13.91
2,KPNREIT,,,0.23,0.0
3,POLY,0.27632,12.89,2.42,6.38
4,QHBREIT,,,0.41,0.0


In [49]:
# ตาราง fundamental (มี pbv, pe, eps)
rows_fin = session.execute("""
    SELECT symbol, eps, pe, pbv,percentYield FROM financal_data_fromsettradeAPI ALLOW FILTERING
""")
df_fin = pd.DataFrame(rows_fin, columns=["symbol", "eps", "pe", "pbv","percentYield"])

# ตาราง candlestick (มี close_price ล่าสุด)
rows_price = session.execute("""
    SELECT symbol, time, close_price,volume FROM candlestick_data ALLOW FILTERING
""")
df_price = pd.DataFrame(rows_price, columns=["symbol", "time", "close_price","volume"])

In [50]:
# ✅ 2. หาราคาปิดล่าสุด (close_price ล่าสุดของแต่ละ symbol)
df_price_latest = (
    df_price.sort_values("time")
    .groupby("symbol")
    .last()
    .reset_index()
)

In [51]:
# ✅ 3. Merge ข้อมูลทั้งสองตารางเข้าด้วยกัน
df_merged = pd.merge(df_fin, df_price_latest[["symbol", "close_price","volume"]], on="symbol", how="inner")


In [52]:
df_merged["marketcap"] = df_merged.apply(
    lambda row: row["volume"] * row["close_price"]
    if all(isinstance(row[col], (int, float)) for col in ["volume", "close_price"])
    else None,
    axis=1
)

In [53]:
df_merged.head()

Unnamed: 0,symbol,eps,pe,pbv,percentYield,close_price,volume,marketcap
0,PPPM,0.02,0.0,0.7,0.0,0.49,313650,153688.502991
1,TPCH,0.24112,7.9,0.34,13.91,3.08,52708,162340.635979
2,KPNREIT,,,0.23,0.0,2.56,100,255.999994
3,POLY,0.27632,12.89,2.42,6.38,6.95,5100,35444.999027
4,QHBREIT,,,0.41,0.0,3.44,140301,482635.448028


In [59]:
# ✅ 6. เตรียมข้อมูลสำหรับ KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.cluster import KMeans


X = df_merged[["eps", "pe", "pbv", "marketcap"]]


# ✅ 7. จัดการ NaN ด้วย SimpleImputer
imputer = SimpleImputer(strategy="mean")
X_imputed = imputer.fit_transform(X)


# ✅ 8. ทำ Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)


# ✅ 9. KMeans Clustering
kmeans = KMeans(n_clusters=5, random_state=42, n_init='auto')
df_merged['group_num'] = kmeans.fit_predict(X_scaled)


# ✅ 10. Map group number เป็น A-E
group_map = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E'}
df_merged['group'] = df_merged['group_num'].map(group_map)


# ✅ แสดงผลลัพธ์สุดท้าย
df_merged[["symbol", "eps", "pe", "pbv", "close_price", "volume",  "marketcap", "group"]].head()

Unnamed: 0,symbol,eps,pe,pbv,close_price,volume,marketcap,group
0,PPPM,0.02,0.0,0.7,0.49,313650,153688.502991,A
1,TPCH,0.24112,7.9,0.34,3.08,52708,162340.635979,A
2,KPNREIT,,,0.23,2.56,100,255.999994,A
3,POLY,0.27632,12.89,2.42,6.95,5100,35444.999027,A
4,QHBREIT,,,0.41,3.44,140301,482635.448028,A


In [None]:
# สมมุติ df_candle = candlestick + trend_group
#         df_financial = fundamental + group

# เลือกคอลัมน์ที่จำเป็น
df1 = df_price[['symbol', 'time', 'trend_group']]
df2 = df_financial[['symbol', 'group']]  # 'group' คือ fundamental group (A-E)

# ✅ รวม group ทั้งสองกลายเป็น quadrant label
df_merged = pd.merge(df1, df2, on='symbol', how='inner')
df_merged['quadrant'] = df_merged['group'] + df_merged['trend_group']
