In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.models import load_model
import os

# 📂 Load the dataset
file_path = r"C:\Users\sagni\Downloads\New folder\mbti_1.csv"
data = pd.read_csv(file_path)

# ✂️ Drop missing values
data.dropna(inplace=True)

# 🧠 Preprocess text to get word counts (simplified)
data['text'] = data['posts'].str.replace(r'\|\|\|', ' ', regex=True).str.lower()

# 🔠 Encode target
le = LabelEncoder()
data['target'] = le.fit_transform(data['type'])

# ✂️ Input and output
X = data['text']
y = to_categorical(data['target'])

# ✏️ Convert text to simple TF-IDF features
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_features=1000)
X_vectorized = vectorizer.fit_transform(X).toarray()

# 🔪 Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# 🧠 Build the model
model = Sequential()
model.add(Dense(512, input_dim=1000, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(16, activation='softmax'))  # 16 MBTI types

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 🛑 Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=3)

# 🚆 Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, callbacks=[early_stop])

# ✅ Save the model in .h5 format
output_model_h5 = r"C:\Users\sagni\Downloads\New folder\personality_estimator.h5"
model.save(output_model_h5)
print(f"✅ Model saved to .h5: {output_model_h5}")

# ✅ Export the model as TensorFlow SavedModel (alternative)
output_model_dir = r"C:\Users\sagni\Downloads\New folder\personality_estimator_savedmodel"
model.export(output_model_dir)  # Use export() instead of save()
print(f"✅ Model exported to directory: {output_model_dir}")


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.2517 - loss: 2.2706 - val_accuracy: 0.4957 - val_loss: 1.7054
Epoch 2/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5504 - loss: 1.5169 - val_accuracy: 0.5816 - val_loss: 1.3882
Epoch 3/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6473 - loss: 1.2102 - val_accuracy: 0.6271 - val_loss: 1.2860
Epoch 4/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7157 - loss: 0.9884 - val_accuracy: 0.6086 - val_loss: 1.2957
Epoch 5/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7416 - loss: 0.8829 - val_accuracy: 0.6265 - val_loss: 1.2751
Epoch 6/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7877 - loss: 0.7483 - val_accuracy: 0.6398 - val_loss: 1.2512
Epoch 7/10
[1m217/217[0m [32m━━━━━━━



✅ Model saved to .h5: C:\Users\sagni\Downloads\New folder\personality_estimator.h5
INFO:tensorflow:Assets written to: C:\Users\sagni\Downloads\New folder\personality_estimator_savedmodel\assets


INFO:tensorflow:Assets written to: C:\Users\sagni\Downloads\New folder\personality_estimator_savedmodel\assets


Saved artifact at 'C:\Users\sagni\Downloads\New folder\personality_estimator_savedmodel'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 1000), dtype=tf.float32, name='keras_tensor_12')
Output Type:
  TensorSpec(shape=(None, 16), dtype=tf.float32, name=None)
Captures:
  2897358895248: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2897358895824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2899911639696: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2899911639504: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2899941917520: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2899941919056: TensorSpec(shape=(), dtype=tf.resource, name=None)
✅ Model exported to directory: C:\Users\sagni\Downloads\New folder\personality_estimator_savedmodel
