In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import joblib

# Load dataset
df = pd.read_csv(r"C:\Users\sagni\Downloads\New folder\mbti_1.csv")

# Preprocess text
df['clean_text'] = df['posts'].str.lower().str.replace(r'[^a-z ]', '', regex=True)

# Encode MBTI types
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['type'])
joblib.dump(label_encoder, r"C:\Users\sagni\Downloads\New folder\label_encoder.joblib")

# Tokenize
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')
tokenizer.fit_on_texts(df['clean_text'])
sequences = tokenizer.texts_to_sequences(df['clean_text'])
X = pad_sequences(sequences, maxlen=300)

# Save tokenizer
joblib.dump(tokenizer, r"C:\Users\sagni\Downloads\New folder\tokenizer.joblib")

y = tf.keras.utils.to_categorical(df['label'], num_classes=16)

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Build model
model = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=300),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.5),
    LSTM(64),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(16, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

# Save model
model.save(r"C:\Users\sagni\Downloads\New folder\personality_estimator_lstm.h5")




Epoch 1/5
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 350ms/step - accuracy: 0.1743 - loss: 2.5019 - val_accuracy: 0.2196 - val_loss: 2.2997
Epoch 2/5
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 355ms/step - accuracy: 0.2158 - loss: 2.2859 - val_accuracy: 0.2196 - val_loss: 2.2974
Epoch 3/5
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 356ms/step - accuracy: 0.2109 - loss: 2.1679 - val_accuracy: 0.2110 - val_loss: 2.3546
Epoch 4/5
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 358ms/step - accuracy: 0.2711 - loss: 1.9696 - val_accuracy: 0.1769 - val_loss: 2.4557
Epoch 5/5
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 358ms/step - accuracy: 0.3726 - loss: 1.7225 - val_accuracy: 0.1746 - val_loss: 2.7479




In [2]:
from tensorflow.keras.models import load_model
import joblib
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load model and tokenizer
model = load_model(r"C:\Users\sagni\Downloads\New folder\personality_estimator_lstm.h5")
tokenizer = joblib.load(r"C:\Users\sagni\Downloads\New folder\tokenizer.joblib")
label_encoder = joblib.load(r"C:\Users\sagni\Downloads\New folder\label_encoder.joblib")

# Predict loop
while True:
    text = input("\n📝 Enter text for personality prediction (or type 'exit' to quit):\n> ")
    if text.lower() == 'exit':
        break
    sequence = tokenizer.texts_to_sequences([text.lower()])
    padded = pad_sequences(sequence, maxlen=300)
    prediction = model.predict(padded)[0]
    mbti_index = np.argmax(prediction)
    confidence = prediction[mbti_index]
    mbti_type = label_encoder.inverse_transform([mbti_index])[0]
    print(f"\n🔍 Predicted MBTI Type: {mbti_type} (Confidence: {confidence:.2f})")





📝 Enter text for personality prediction (or type 'exit' to quit):
>  i am ver angry at my cook now


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 741ms/step

🔍 Predicted MBTI Type: INFP (Confidence: 0.69)



📝 Enter text for personality prediction (or type 'exit' to quit):
>  i am very sad for my pet


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step

🔍 Predicted MBTI Type: INFP (Confidence: 0.70)



📝 Enter text for personality prediction (or type 'exit' to quit):
>  i am okay


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step

🔍 Predicted MBTI Type: INFP (Confidence: 0.72)



📝 Enter text for personality prediction (or type 'exit' to quit):
>  I love to spend time thinking about the mysteries of life and enjoy deep conversations.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step

🔍 Predicted MBTI Type: INFP (Confidence: 0.41)



📝 Enter text for personality prediction (or type 'exit' to quit):
>  exit
