<a href="https://colab.research.google.com/github/leman-cap13/my_projects/blob/main/Twitter_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
files.upload()

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download jp797498e/twitter-entity-sentiment-analysis

In [None]:
import zipfile
with zipfile.ZipFile('/content/twitter-entity-sentiment-analysis.zip','r') as zip_ref:
  zip_ref.extractall()

In [None]:
import pandas as pd

In [None]:
columns =["ID","Entity","Label","Text"]
df=pd.read_csv('/content/twitter_training.csv',names=columns)
df

In [None]:
df['Label'].value_counts().plot(kind='bar');

In [None]:
df['Entity'].value_counts()

In [None]:
df.isna().sum()

In [None]:
df.duplicated().sum()

In [None]:
df.drop_duplicates(inplace=True)

In [None]:
df.dropna(inplace=True)

In [None]:
import re

def clean_text(text):
    text = re.sub(r'@[A-Za-z0-9_]+', '', text)
    text = re.sub(r'#', '', text)
    text = re.sub(r'RT[\s]+', '', text)
    text = re.sub(r'https?://\S+', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    return text.lower()

df['clean_text'] = df['Text'].apply(clean_text)


In [None]:
df

In [None]:
df.drop('Text',axis=1,inplace=True)

In [None]:
df.head()

In [None]:
import nltk
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
lemmatizer=WordNetLemmatizer()

def lemmatize_text(text):
    words = text.split()
    lemmatized_words = [lemmatizer.lemmatize(word,pos="v") for word in words]
    return ' '.join(lemmatized_words)

In [None]:
df['clean_text'] = df['clean_text'].apply(lemmatize_text)

In [None]:
df['clean_text'][1]

In [None]:
df['Label']=df['Label'].map({'Neutral':0,'Positive':1,'Negative':2,'Irrelevant':3})

In [None]:
df.head()

In [None]:
import tensorflow as tf

In [None]:
tf.random.set_seed(42)
text_vector_layer=tf.keras.layers.TextVectorization(max_tokens=10000,output_sequence_length=50)
text_vector_layer.adapt(df['clean_text'])

In [None]:
text_vector_layer.get_vocabulary()

In [None]:
texts=df['clean_text'].values
labels=df['Label'].values

In [None]:
labels

In [None]:
vectorized_text=text_vector_layer(texts)
vectorized_text

In [None]:
# dataset=tf.data.Dataset.from_tensor_slices((vectorized_text,labels))

In [None]:
vocab_size=len(text_vector_layer.get_vocabulary())
vocab_size

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
labels = tf.keras.utils.to_categorical(labels, num_classes=4)

X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)


train_data = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_data = tf.data.Dataset.from_tensor_slices((X_test, y_test))

train = train_data.shuffle(1000, seed=13).batch(32).prefetch(1)
test = test_data.batch(32).prefetch(1)

In [None]:
model=tf.keras.models.Sequential([
    text_vector_layer,
    tf.keras.layers.Embedding(input_dim=vocab_size,output_dim=64,mask_zero=True),
    tf.keras.layers.LSTM(128,return_sequences=False),
    tf.keras.layers.Dense(4,activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(train,epochs=10,validation_data=test)


In [None]:
example_sentence='Now the President is slapping Americans in the face that he really did commit an unlawful act'

example_tensor = tf.constant([example_sentence])

prediction = model.predict(example_tensor)

predicted_class = prediction.argmax(axis=-1)

In [None]:
predicted_class

In [None]:
class_labels = ['Neutral', 'Positive', 'Negative', 'Irrelevant']
print(f"Predicted Class: {class_labels[predicted_class[0]]}")

In [None]:
example_second_sentence='how beautiful application we have'
example_tensor=tf.constant([example_second_sentence])
prediction=model.predict(example_tensor)
pred=prediction.argmax(axis=-1)

In [None]:
pred

In [None]:
class_labels[pred[0]]

In [None]:
example_sentence_3='@Microsoft Why do I pay for WORD when it functions so poorly on my @SamsungUS Chromebook? 🙄'

exm_tensor=tf.constant([example_sentence_3])
pred=model.predict(exm_tensor)
pred_1=pred.argmax(axis=-1)


In [None]:
pred_1

In [None]:
class_labels[pred_1[0]]