In [100]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('./sub_id3.csv')

In [101]:
label_encoder = LabelEncoder()
df['ID_encoded'] = label_encoder.fit_transform(df['sub_id'])

In [102]:
# embedding layer 생성

import tensorflow as tf
from tensorflow.keras.layers import Embedding
print(tf.__version__)

n_unique_ids = df['ID_encoded'].nunique()
embedding_dim = min(n_unique_ids // 2, 50)  # 임베딩 차원 설정. 보통은 고유 ID의 수의 절반 혹은 50을 선택

embedding_layer = Embedding(input_dim=n_unique_ids, 
                            output_dim=embedding_dim, 
                            input_length=1, 
                            name='ID_embedding')

2.10.0


In [103]:
df

Unnamed: 0,sub_id,ID_encoded
0,1,0
1,1,0
2,1,0
3,1,0
4,1,0
...,...,...
229,78,38
230,78,38
231,78,38
232,78,38


In [111]:
# 임베딩 레이어를 통과하여 벡터 생성 후, Flatten 적용

from tensorflow.keras.layers import Flatten

# 데이터를 모델에 넣을 수 있는 형태로 변환
input_data = df['ID_encoded'].values.reshape(-1, 1)

# 임베딩 레이어를 통과
embedded_data = embedding_layer(input_data)

# Flatten the output to enable concatenation
sub_id_input = Flatten()(embedded_data)

In [112]:
sub_id_input

<tf.Tensor: shape=(234, 19), dtype=float32, numpy=
array([[-0.01108814,  0.02162075, -0.00984674, ...,  0.04328972,
         0.04844919, -0.02203841],
       [-0.01108814,  0.02162075, -0.00984674, ...,  0.04328972,
         0.04844919, -0.02203841],
       [-0.01108814,  0.02162075, -0.00984674, ...,  0.04328972,
         0.04844919, -0.02203841],
       ...,
       [-0.02805481,  0.04661782, -0.02734996, ...,  0.03892246,
         0.02535753,  0.04679164],
       [-0.02805481,  0.04661782, -0.02734996, ...,  0.03892246,
         0.02535753,  0.04679164],
       [-0.02805481,  0.04661782, -0.02734996, ...,  0.03892246,
         0.02535753,  0.04679164]], dtype=float32)>

In [113]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(234, 19)), 
    tf.keras.layers.Dense(234, activation='relu'),     
    #tf.keras.layers.Dense(64, activation='relu'), 
    #tf.keras.layers.Dense(10, activation='softmax') 
    tf.keras.layers.Flatten()
])
sub_id_input = tf.expand_dims(sub_id_input, axis = 0)
output = model(sub_id_input)
output

<tf.Tensor: shape=(1, 234), dtype=float32, numpy=
array([[0.        , 0.        , 0.04311332, 0.        , 0.0620982 ,
        0.00656068, 0.        , 0.        , 0.04098495, 0.02249741,
        0.        , 0.        , 0.00425776, 0.        , 0.07535933,
        0.04187788, 0.        , 0.        , 0.01800788, 0.01006536,
        0.        , 0.00188536, 0.0346393 , 0.00679357, 0.        ,
        0.0220927 , 0.04153457, 0.01289266, 0.00682935, 0.        ,
        0.06127568, 0.        , 0.        , 0.07817505, 0.        ,
        0.02133018, 0.0013867 , 0.        , 0.05107975, 0.0297545 ,
        0.        , 0.0557247 , 0.        , 0.03222594, 0.01993659,
        0.02660815, 0.        , 0.00136963, 0.        , 0.        ,
        0.01962035, 0.05717619, 0.02984809, 0.        , 0.        ,
        0.06396039, 0.00223971, 0.01237554, 0.        , 0.03128192,
        0.02723566, 0.01407567, 0.07141528, 0.03515057, 0.00953469,
        0.02121177, 0.02629533, 0.01069603, 0.        , 0.01388218

In [115]:
pd.DataFrame(tf.reshape(output, [234,1]).numpy(), columns = ['sub_id'])

Unnamed: 0,sub_id
0,0.000000
1,0.000000
2,0.043113
3,0.000000
4,0.062098
...,...
229,0.000000
230,0.000000
231,0.025994
232,0.026959


In [72]:
# 데이터 불러오기
import pandas as pd

df = pd.read_csv('./video_id.csv')
df

Unnamed: 0,video_id
0,neutral
1,disgust
2,fear
3,sad
4,happy
...,...
229,disgust
230,anger
231,sad
232,happy


In [89]:
# OneHotEncoding
from sklearn.preprocessing import OneHotEncoder

# Initialize OneHotEncoder
one_hot_encoder = OneHotEncoder()

# Perform one-hot encoding
one_hot_encoded = one_hot_encoder.fit_transform(df['video_id'].values.reshape(-1, 1))

input_data = one_hot_encoded.toarray()
# Convert the result back to a dataframe
#df_encoded = pd.DataFrame(one_hot_encoded, columns=one_hot_encoder.categories_[0])

# Convert df_encoded to a suitable input for Keras
input_data.shape

#video_id_input = df_encoded.values

(234, 6)

In [92]:
input_data = tf.expand_dims(input_data, axis = 0)

In [93]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(234, 6)), 
    tf.keras.layers.Dense(234, activation='relu'),     
    #tf.keras.layers.Dense(64, activation='relu'), 
    #tf.keras.layers.Dense(10, activation='softmax') 
    tf.keras.layers.Flatten()
])

In [94]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_15 (Flatten)        (None, 1404)              0         
                                                                 
 dense_21 (Dense)            (None, 234)               328770    
                                                                 
 flatten_16 (Flatten)        (None, 234)               0         
                                                                 
Total params: 328,770
Trainable params: 328,770
Non-trainable params: 0
_________________________________________________________________


In [95]:
output = model(input_data)

In [77]:
output

<tf.Tensor: shape=(1, 234), dtype=float32, numpy=
array([[3.1379070e+00, 3.2347145e+00, 2.6562896e+00, 1.7469971e+00,
        0.0000000e+00, 0.0000000e+00, 2.5904496e+00, 1.6534838e-01,
        2.8007145e+00, 3.2431390e-02, 0.0000000e+00, 0.0000000e+00,
        2.3693488e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        1.7011636e+00, 0.0000000e+00, 1.4164349e-01, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 1.1440498e+00, 0.0000000e+00,
        0.0000000e+00, 1.1055026e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 5.3963012e-01, 1.4307210e+00,
        0.0000000e+00, 1.6683235e+00, 4.7304443e-01, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 5.1353461e-01,
        0.0000000e+00, 1.7246397e+00, 8.3070719e-01, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 1.4753659e-01, 4.7397611e-01,
        4.6141970e-01, 1.5112751e+00, 0.0000000e+00, 0.0000000e+00,
        2.8712749e-01, 0.0000000e+00, 1.7582875e+00, 1.3748217e-01

In [96]:
output.shape

TensorShape([1, 234])

In [97]:
pd.DataFrame(tf.reshape(output, [234,1]).numpy(), columns = ['one-hot'])

Unnamed: 0,one-hot
0,0.090374
1,0.000000
2,0.000000
3,0.348055
4,0.359027
...,...
229,0.373528
230,0.000000
231,0.125142
232,0.000000
