In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
def one_hot_encode(data):

  # 分類數量
  num_classes = 10
  min_birth_year = min(data['birth_year'])
  max_birth_year = max(data['birth_year'])
  interval = (max_birth_year - min_birth_year) // num_classes
  birth_year_categories = []
  for year in data['birth_year']:
    category = (year - min_birth_year) // interval
    birth_year_categories.append(category)

  gender_categories = ['male', 'female']
  zodiac_categories = ['Aquarius', 'Pisces', 'Aries', 'Taurus', 'Gemini', 'Cancer', 'Leo', 'Virgo', 'Libra', 'Scorpio', 'Sagittarius', 'Capricorn']
  state_categories = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']
  education_categories = ['Elementary school', 'Middle school', 'High school', 'University', 'Graduate School']

  # 將各特徵進行 One-Hot Encoding
  gender_one_hot = tf.one_hot(data['gender'].apply(lambda x: gender_categories.index(x)), depth=len(gender_categories))
  birth_year_one_hot = tf.one_hot(birth_year_categories, depth=num_classes)
  zodiac_one_hot = tf.one_hot(data['zodiac'].apply(lambda x: zodiac_categories.index(x)), depth=len(zodiac_categories))
  state_one_hot = tf.one_hot(data['state'].apply(lambda x: state_categories.index(x)), depth=len(state_categories))
  education_one_hot = tf.one_hot(data['education'].apply(lambda x: education_categories.index(x)), depth=len(education_categories))

  # 將 Tensor 轉換為 Numpy Array
  gender_one_hot_np = np.array(gender_one_hot)
  birth_year_one_hot_np = np.array(birth_year_one_hot)
  zodiac_one_hot_np = np.array(zodiac_one_hot)
  state_one_hot_np = np.array(state_one_hot)
  education_one_hot_np = np.array(education_one_hot)

  # 將所有特徵合併
  all_features = np.concatenate([gender_one_hot_np, birth_year_one_hot_np, zodiac_one_hot_np, state_one_hot_np, education_one_hot_np], axis=1)

  print(all_features)
  print(all_features.shape)

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

class ProfileBasedTopicPredictionModel:
    
    def __init__(self, input_dim, output_dim):
        self.features = ['gender', 'birth_year', 'zodiac', 'state', 'education']
        self.bigfive = ['openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism']
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.model = None
        
    def build_model(self):
        self.model = Sequential()
        self.model.add(Dense(128, activation='relu', input_dim=self.input_dim, kernel_initializer='he_uniform', kernel_regularizer=l2(0.01)))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(64, activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(0.01)))
        self.model.add(Dropout(0.3))
        self.model.add(Dense(self.output_dim, activation='softmax'))
        self.model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])
        
    def train_model(self, df, batch_size, epochs):
        X_Feature = df[self.features]
        X_BigFive = df[self.bigfive]

        #將feature進行one_hot
        X_Feature = one_hot_encode(X_Feature)
        
        #將bigfive標準化
        scaler = StandardScaler()
        X_BigFive = scaler.fit_transform(X_BigFive)
        X_BigFive = np.array(X_BigFive)

        #合併feature及bigfive
        X_train = np.concatenate([X_Feature, X_BigFive],axis=1)

        self.model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1)
        
    def predict_topics(self, user_profile):
        user_profile = np.array(user_profile).reshape(1, -1)
        scaler = StandardScaler()
        user_profile = scaler.fit_transform(user_profile)
        prediction = self.model.predict(user_profile)
        return prediction[0]
