In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import language_tool_python
import tensorflow as tf

In [2]:
# Clear the TensorFlow session
tf.keras.backend.clear_session()

In [3]:
df = pd.read_csv('Data for AI Assignment - Sheet1.csv', usecols=['Text', 'Classification'])

In [4]:
df.head()

Unnamed: 0,Text,Classification
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [5]:
# Drop duplicates
df.drop_duplicates(inplace=True)

In [6]:
# Identify and display texts with profanities

profanity_list = ['fuck', 'Bastard', 'shit', 'sex']
texts_with_profanities = df[df['Text'].str.contains('|'.join(profanity_list), case=False)]
print("Texts with Profanities:")
print(texts_with_profanities[['Text', 'Classification']])

Texts with Profanities:
                                                    Text Classification
34     i can t imagine a real life scenario where i w...            joy
58     i have this feeling that if i have anymore vig...            joy
72     i don t necessarily think f bombs and sex are ...            joy
95     i feel like throwing away the shitty piece of ...        sadness
206    i legs would feel shitty for a few miles but w...        sadness
...                                                  ...            ...
17703  i feel really shitty and it s seriously like t...        sadness
17753  i feel like everything that i hope to become a...        sadness
17791  i took care of myself by avoiding family event...        sadness
17796                      i feel like a moronic bastard        sadness
17979  i dont want to always be judgmental of particu...        sadness

[221 rows x 2 columns]


In [7]:
# Remove profanities
df = df[~df['Text'].str.contains('|'.join(profanity_list), case=False)]

In [8]:
# Convert Classification to numeric

class_mapping = {'sadness': 0.0, 'anger': 0.1, 'love': 0.2, 'surprise': 0.3, 'fear': 0.4,'joy':0.5}
df['Classification_code'] = df['Classification'].map(class_mapping)

In [9]:
# Check grammar errors

df

Unnamed: 0,Text,Classification,Classification_code
0,i didnt feel humiliated,sadness,0.0
1,i can go from feeling so hopeless to so damned...,sadness,0.0
2,im grabbing a minute to post i feel greedy wrong,anger,0.1
3,i am ever feeling nostalgic about the fireplac...,love,0.2
4,i am feeling grouchy,anger,0.1
...,...,...,...
17996,i just keep feeling like someone is being unki...,anger,0.1
17997,im feeling a little cranky negative after this...,anger,0.1
17998,i feel that i am useful to my people and that ...,joy,0.5
17999,im feeling more comfortable with derby i feel ...,joy,0.5


In [10]:
from language_tool_python import LanguageTool
tool = LanguageTool('en-US')

# Function to check for grammar errors
def has_grammar_errors(text):
    matches = tool.check(text)
    return len(matches) > 0

In [11]:
# Create a new column 'Grammar_errors' indicating whether each sentence has grammar errors
df['Grammar_errors'] = df['Text'].apply(has_grammar_errors)

In [12]:
# Display texts with grammar errors
print("\nTexts with Grammar Errors:")
texts_with_grammar_errors = df[df['Grammar_errors']]
print(texts_with_grammar_errors[['Text', 'Classification']])


Texts with Grammar Errors:
                                                    Text Classification
0                                i didnt feel humiliated        sadness
1      i can go from feeling so hopeless to so damned...        sadness
2       im grabbing a minute to post i feel greedy wrong          anger
3      i am ever feeling nostalgic about the fireplac...           love
4                                   i am feeling grouchy          anger
...                                                  ...            ...
17996  i just keep feeling like someone is being unki...          anger
17997  im feeling a little cranky negative after this...          anger
17998  i feel that i am useful to my people and that ...            joy
17999  im feeling more comfortable with derby i feel ...            joy
18000  i feel all weird when i have to meet w people ...           fear

[17778 rows x 2 columns]


In [13]:
# Create a new column 'Grammar_mark' to mark grammatically incorrect or incomplete sentences
df['Grammar_mark'] = df['Grammar_errors'].map({True: 'Incorrect', False: 'Correct'})

In [14]:
# Display the updated DataFrame
print("\nDataFrame after marking grammar errors:")
print(df)


DataFrame after marking grammar errors:
                                                    Text Classification  \
0                                i didnt feel humiliated        sadness   
1      i can go from feeling so hopeless to so damned...        sadness   
2       im grabbing a minute to post i feel greedy wrong          anger   
3      i am ever feeling nostalgic about the fireplac...           love   
4                                   i am feeling grouchy          anger   
...                                                  ...            ...   
17996  i just keep feeling like someone is being unki...          anger   
17997  im feeling a little cranky negative after this...          anger   
17998  i feel that i am useful to my people and that ...            joy   
17999  im feeling more comfortable with derby i feel ...            joy   
18000  i feel all weird when i have to meet w people ...           fear   

       Classification_code  Grammar_errors Grammar_mark  


In [15]:
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(df['Text'].values)
X = tokenizer.texts_to_sequences(df['Text'].values)
X = pad_sequences(X)

In [16]:
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(6,activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())

Y = pd.get_dummies(df['Classification_code']).values
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.33, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

batch_size = 32
model.fit(X_train, Y_train, epochs = 25, batch_size=batch_size, verbose = 1)
model.save('/content/sen.h5')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 61, 128)           256000    
                                                                 
 spatial_dropout1d (Spatial  (None, 61, 128)           0         
 Dropout1D)                                                      
                                                                 
 lstm (LSTM)                 (None, 196)               254800    
                                                                 
 dense (Dense)               (None, 6)                 1182      
                                                                 
Total params: 511982 (1.95 MB)
Trainable params: 511982 (1.95 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
(11911, 61) (11911, 6)
(5867, 61) (5867, 6)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoc

  saving_api.save_model(
