In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
df=pd.read_csv("train.txt",delimiter=';',names=['sentence','label'])
df.head()

In [None]:
df.value_counts('label')

In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
df['label']=le.fit_transform(df['label'])
df.head()

In [None]:
# prompt: create class weights for labels

from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(df['label']),
    y=df['label']
)

class_weights_dict = dict(enumerate(class_weights))
print(class_weights_dict)


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

In [None]:
tokanizer=Tokenizer(num_words=10000)
tokanizer.fit_on_texts(df['sentence'])
sequences=tokanizer.texts_to_sequences(df['sentence'])


In [None]:
len(tokanizer.word_counts)

In [None]:
tr_df=pad_sequences(sequences,maxlen=100,padding='post')
tr_y=to_categorical(df['label'])

In [None]:
vocab_size=len(tokanizer.word_index)+1
embedding_dim=128
max_length=100
num_classes=6

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout,Bidirectional,GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
model=Sequential()
model.add(Embedding(input_dim=vocab_size,output_dim=embedding_dim,input_length=max_length))
model.add(Bidirectional(LSTM(128,return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(64)))
model.add(GlobalAveragePooling1D())
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes,activation='softmax'))

In [None]:
model=Sequential()
model.add(Embedding(input_dim=vocab_size,output_dim=embedding_dim,input_length=max_length))
model.add(Bidirectional(LSTM(128,return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(64, return_sequences=True))) # Added return_sequences=True
model.add(GlobalAveragePooling1D())
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes,activation='softmax'))

In [None]:
# prompt: make combile and use class_weight

model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(tr_df, tr_y, epochs=15, batch_size=32, validation_split=0.1, class_weight=class_weights_dict, callbacks=[early_stopping])


In [None]:
# prompt: read test.txt and split the sentence and label then convert label by labelencoder and predict the sentense label then get the accurecy

df_test = pd.read_csv("test.txt", delimiter=';', names=['sentence', 'label'])
df_test['label'] = le.transform(df_test['label'])
test_sequences = tokanizer.texts_to_sequences(df_test['sentence'])
test_df = pad_sequences(test_sequences, maxlen=100, padding='post')
test_y = to_categorical(df_test['label'])

loss, accuracy = model.evaluate(test_df, test_y)
print(f"Test Accuracy: {accuracy}")

y_pred = model.predict(test_df)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(test_y, axis=1)

from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_true_classes, y_pred_classes)
print(f"Test Accuracy: {accuracy}")


In [None]:
# prompt: read test.txt and split the sentence and label then convert label by labelencoder and predict the sentense label then get the accurecy

df_test = pd.read_csv("val.txt", delimiter=';', names=['sentence', 'label'])
df_test['label'] = le.transform(df_test['label'])
test_sequences = tokanizer.texts_to_sequences(df_test['sentence'])
test_df = pad_sequences(test_sequences, maxlen=100, padding='post')
test_y = to_categorical(df_test['label'])

loss, accuracy = model.evaluate(test_df, test_y)
print(f"Test Accuracy: {accuracy}")

y_pred = model.predict(test_df)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(test_y, axis=1)

from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_true_classes, y_pred_classes)
print(f"Test Accuracy: {accuracy}")


In [None]:
import pickle
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokanizer, f)


In [None]:
import pickle

# Assuming your model is named `model`
# Instead of using pickle, use model.save to save it in HDF5 format
model.save('model.h5')

In [None]:
df

In [None]:
# prompt: predict the feeling of "happy" convert it to the word

import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
import tensorflow as tf # Import tensorflow
import pickle
# Load the tokenizer
with open('tokenizer.pkl', 'rb') as f:
    loaded_tokenizer = pickle.load(f)

# Load the model
loaded_model = tf.keras.models.load_model('model.h5') # Changed to load_model from tf.keras.models and corrected file extension to .h5

# Input sentence
input_sentence = "i didnt feel humiliated	"

# Convert the sentence to a sequence of integers
sequence = loaded_tokenizer.texts_to_sequences([input_sentence])

# Pad the sequence
padded_sequence = pad_sequences(sequence, maxlen=100, padding='post')

# Make predictions
prediction = loaded_model.predict(padded_sequence)

# Get the predicted class index
predicted_class_index = np.argmax(prediction)

# Assuming you have a label encoder named `le`
# Convert the predicted class index to the original label
predicted_label = le.inverse_transform([predicted_class_index])[0]

print(f"The predicted feeling is: {predicted_label}")


In [None]:
!pip install streamlit
!pip install pyngrok


In [None]:
from pyngrok import ngrok

# Terminate any open tunnels in case they're already running
ngrok.kill()

# Set your authtoken
ngrok.set_auth_token("2j5sVDMLHk5H7LWu2O9nr0K7DeG_5r5yLnFAaauAjVjDfoWmP") # Replace YOUR_AUTHTOKEN with the token you copied

# Create a new tunnel on port 8501 (default Streamlit port)
# Pass the port as an integer and explicitly specify the protocol
public_url = ngrok.connect(8501, proto="http")  # Changed to integer port and added protocol
public_url

In [None]:
!wget -q -O- ipv4.icanhazip.com


In [None]:
# prompt: make dictionary for the label to delabel it

label_to_delabel = {label: delabel for label, delabel in zip(df['label'], le.inverse_transform(df['label']))}
print(label_to_delabel)

In [None]:
!streamlit run app.py & npx localtunnel --port 8500

In [None]:
# prompt: make dataframe from 2sample from enevry label

# Create an empty list to store the samples
samples_per_label = []

# Iterate through unique labels in the DataFrame
for label in df['label'].unique():
  # Get two random samples for the current label
  label_samples = df[df['label'] == label].sample(n=2, random_state=42)  # Adjust 'n' for the number of samples per label
  samples_per_label.append(label_samples)

# Concatenate the samples into a new DataFrame
new_df = pd.concat(samples_per_label)

new_df

In [None]:
df.iloc[11433,0]

In [None]:
df.iloc[15428,0]  # Access the row with index 11433


In [None]:
df.iloc[4989,0]

In [None]:
df.iloc[14343,0]

In [None]:
df.iloc[14133,0]

In [None]:
df.iloc[4996,0]

In [None]:
df.iloc[10462,0]