In [1]:
import pandas as pd
import numpy as np
import os
from cassandra.cluster import Cluster
from datetime import datetime
import plotly.graph_objects as go


In [2]:
cluster = Cluster(['127.0.0.1']) 
session = cluster.connect()

In [3]:
keyspace_name = "data_stock"
session.set_keyspace(keyspace_name)
table_name = " stock_fun_data"
rows = session.execute(f"SELECT * FROM {table_name}")
df = pd.DataFrame(rows)
df.shape


(917, 29)

In [5]:
rows_symbol = session.execute(f"SELECT * FROM data_stock.stock_fun_data WHERE symbol = '24CS'")
df_symbol = pd.DataFrame(rows_symbol)
df_symbol.head()

Unnamed: 0,symbol,aumsize,average,change,eps,exchange,exerciseprice,exerciseratio,high,impliedvolatility,...,pbv,pe,percentchange,percentyield,securitytype,theoretical,tolasttrade,totalvolume,underlying,underlyingprice
0,24CS,,,,-0.57629,,,,,,...,2.59,0.0,,0.0,CS,,,0,,


In [6]:
row = df_symbol.iloc[0]
non_null = row[row.notnull()]
print(non_null)

symbol               24CS
eps              -0.57629
instrumenttype      STOCK
marketstatus        Close
pbv                  2.59
pe                    0.0
percentyield          0.0
securitytype           CS
totalvolume             0
Name: 0, dtype: object


In [7]:
# ดึงข้อมูลเฉพาะคอลัมน์ที่ต้องการจาก Cassandra
rows_symbol_nonnull = session.execute(
    """
    SELECT symbol, eps, instrumenttype, marketstatus, pbv, pe, percentyield, securitytype, totalvolume
    FROM data_stock.stock_fun_data
    """
)
# แปลงเป็น DataFrame
df_symbol_nonnull = pd.DataFrame(rows_symbol_nonnull)
# แสดงผลลัพธ์
df_symbol_nonnull.head()

Unnamed: 0,symbol,eps,instrumenttype,marketstatus,pbv,pe,percentyield,securitytype,totalvolume
0,PPPM,-0.18924,STOCK,Close,0.67,0.0,0.0,CS,0
1,TPCH,0.63348,STOCK,Close,0.5,4.57,9.76,CS,0
2,KPNREIT,,STOCK,Close,,,0.0,CS,0
3,POLY,0.33433,STOCK,Close,3.05,15.31,4.55,CS,0
4,QHBREIT,,STOCK,Close,,,0.0,CS,0


In [9]:
df_symbol_nonnull.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 917 entries, 0 to 916
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   symbol          917 non-null    object 
 1   eps             851 non-null    float64
 2   instrumenttype  917 non-null    object 
 3   marketstatus    917 non-null    object 
 4   pbv             897 non-null    float64
 5   pe              843 non-null    float64
 6   percentyield    907 non-null    float64
 7   securitytype    917 non-null    object 
 8   totalvolume     917 non-null    int64  
dtypes: float64(4), int64(1), object(4)
memory usage: 64.6+ KB


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 917 entries, 0 to 916
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   symbol             917 non-null    object 
 1   aumsize            0 non-null      object 
 2   average            0 non-null      object 
 3   change             0 non-null      object 
 4   eps                851 non-null    float64
 5   exchange           0 non-null      object 
 6   exerciseprice      0 non-null      object 
 7   exerciseratio      0 non-null      object 
 8   high               0 non-null      object 
 9   impliedvolatility  0 non-null      object 
 10  inav               0 non-null      object 
 11  instrumenttype     917 non-null    object 
 12  intrinsicvalue     0 non-null      object 
 13  last               0 non-null      object 
 14  lasttradingdate    0 non-null      object 
 15  low                0 non-null      object 
 16  marketstatus       917 non

In [19]:
df.isnull().sum()

symbol                 0
aumsize              917
average              917
change               917
eps                   66
exchange             917
exerciseprice        917
exerciseratio        917
high                 917
impliedvolatility    917
inav                 917
instrumenttype         0
intrinsicvalue       917
last                 917
lasttradingdate      917
low                  917
marketstatus           0
maturitydate         917
moneyness            917
pbv                   20
pe                    74
percentchange        917
percentyield          10
securitytype           0
theoretical          917
tolasttrade          917
totalvolume            0
underlying           917
underlyingprice      917
dtype: int64

In [20]:
def classify_basic_financial(df):
    df = df[df[['eps', 'pbv']].notnull().all(axis=1)].copy()

    def score(row):
        score = 0
        if row['eps'] > 0: score += 1
        if row['pbv'] < 2: score += 1  # สมมุติว่า PBV < 2 คือราคายังไม่แพง
        return score

    def to_grade(score):
        if score == 2: return 'A'
        elif score == 1: return 'C'
        else: return 'E'

    df['score'] = df.apply(score, axis=1)
    df['financial_grade'] = df['score'].apply(to_grade)
    return df[['symbol', 'eps', 'pbv', 'financial_grade']]


In [23]:
classified_df = classify_basic_financial(df_symbol_nonnull)
classified_df


Unnamed: 0,symbol,eps,pbv,financial_grade
0,PPPM,-0.18924,0.67,C
1,TPCH,0.63348,0.50,A
3,POLY,0.33433,3.05,C
5,VCOM,0.19740,1.40,A
6,KDH,6.04651,2.26,C
...,...,...,...,...
912,TFM,0.77000,1.51,A
913,TRITN,-0.04760,1.64,C
914,I2,0.13138,0.81,A
915,SPRC,0.47805,0.64,A


In [25]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import pandas as pd

# ใช้ feature ทางการเงิน
features = ['eps', 'pbv']  # เพิ่ม roe, de, etc. ถ้ามี
df_cluster = df_symbol_nonnull[features].dropna().copy()

# สเกลก่อน clustering
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_cluster)


In [31]:
k = 5  # จำนวนกลุ่มที่ต้องการ เช่น 5 กลุ่ม A–E
kmeans = KMeans(n_clusters=k, random_state=42)
clusters = kmeans.fit_predict(X_scaled)

df_cluster['Cluster'] = clusters
df_cluster


Unnamed: 0,eps,pbv,Cluster,symbol
0,-0.18924,0.67,0,PPPM
1,0.63348,0.50,0,TPCH
3,0.33433,3.05,0,POLY
5,0.19740,1.40,0,VCOM
6,6.04651,2.26,3,KDH
...,...,...,...,...
912,0.77000,1.51,0,TFM
913,-0.04760,1.64,0,TRITN
914,0.13138,0.81,0,I2
915,0.47805,0.64,0,SPRC


In [32]:
import plotly.express as px

df_cluster['symbol'] = df_symbol_nonnull.loc[df_cluster.index, 'symbol']

fig = px.scatter(
    df_cluster,
    x='eps',
    y='pbv',
    color='Cluster',
    hover_data=['symbol'],
    title='📊 Clustered Stocks by EPS & PBV',
    color_continuous_scale='Viridis'
)
fig.show()


In [33]:
# ฟีเจอร์ที่ต้องการใช้ (เพิ่มได้ตามที่คุณมี)
features = ['eps', 'pbv']  # เพิ่ม roe, de, netmargin, currentratio ได้ถ้ามี
df_auto = df_symbol_nonnull[features].dropna().copy()

# สเกล
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_auto)


In [37]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# Input และขนาด latent space
input_dim = X_scaled.shape[1]
latent_dim = 2  # ลดเหลือ 2 มิติ เพื่อใช้ plot ด้วย

# Encoder
inputs = Input(shape=(input_dim,))
encoded = Dense(8, activation='relu')(inputs)
latent = Dense(latent_dim, activation='linear')(encoded)

# Decoder
decoded = Dense(8, activation='relu')(latent)
outputs = Dense(input_dim, activation='linear')(decoded)

# Autoencoder Model
autoencoder = Model(inputs, outputs)
encoder = Model(inputs, latent)  # เราจะใช้ตัวนี้ดึง latent features

# Compile & Train
autoencoder.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
autoencoder.fit(X_scaled, X_scaled, epochs=100, batch_size=16, verbose=0)


<keras.src.callbacks.history.History at 0x2457e01d000>

In [38]:
latent_features = encoder.predict(X_scaled)


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step


In [39]:
kmeans = KMeans(n_clusters=5, random_state=42)
cluster_labels = kmeans.fit_predict(latent_features)

# แนบกลับไปที่ DataFrame
df_auto['Cluster'] = cluster_labels
df_auto['symbol'] = df_symbol_nonnull.loc[df_auto.index, 'symbol']


In [40]:
latent_df = pd.DataFrame(latent_features, columns=['z1', 'z2'])
latent_df['Cluster'] = cluster_labels
latent_df['symbol'] = df_auto['symbol'].values

fig = px.scatter(
    latent_df,
    x='z1', y='z2',
    color='Cluster',
    hover_data=['symbol'],
    title='📊 Autoencoder + KMeans: Stock Clustering (Latent Space)'
)
fig.show()


In [42]:
# -- ใช้ latent space ทำ clustering --
latent_features = encoder.predict(X_scaled)
clusters = KMeans(n_clusters=5, random_state=42).fit_predict(latent_features)

df['Cluster'] = clusters
df['financial_grade'] = df['Cluster'].map({0:'A',1:'B',2:'C',3:'D',4:'E'})
df['trend_level'] = np.random.choice(['a','b','c','d','e'], size=len(df))
df['Quadrant'] = df['financial_grade'] + df['trend_level']

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [43]:
# -- สร้างตาราง Plotly Quadrant 5x5 --
quad_groups = df.groupby('Quadrant')['symbol'].apply(list).to_dict()
levels_f = ['A', 'B', 'C', 'D', 'E']
levels_t = ['a', 'b', 'c', 'd', 'e']

table_text = []
for f in levels_f:
    row = []
    for t in levels_t:
        code = f + t
        stocks = quad_groups.get(code, [])
        cell = f"<b>{code}</b><br>" + "<br>".join(stocks) if stocks else f"<b>{code}</b><br> -"
        row.append(cell)
    table_text.append(row)

colors = [[{'A': '#2ecc71', 'B': '#58d68d', 'C': '#f4d03f', 'D': '#f39c12', 'E': '#e74c3c'}[f]]*5 for f in levels_f]

fig = go.Figure(data=go.Table(
    header=dict(values=["Super Bullish", "Up Trend", "Sideway", "Down Trend", "Crash"],
                align="center", fill_color="#dcdcdc", font=dict(color="black", size=14)),
    cells=dict(values=table_text,
               fill_color=colors,
               align="center",
               height=80,
               font=dict(color="white", size=12))
))
fig.update_layout(title="📊 Autoencoder + KMeans: Stock Quadrant Classification")
fig.show()