In [1]:
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

# Load the dataset
file_path = 'combined_data.xlsx' # Update with the actual path
data = pd.read_excel(file_path)

# Convert sentiment labels from -1 to 0 (positive) and 1 to 1 (negative)
label_columns = ['label'] + [col for col in data.columns if col.startswith('label_')]
data[label_columns] = data[label_columns].applymap(lambda x: 0 if x == -1 else 1)

# Determine the overall sentiment
data['overall_sentiment'] = data[label_columns].max(axis=1)

# Tokenize the sentences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['sentence'])
sequences = tokenizer.texts_to_sequences(data['sentence'])

# Padding sequences to ensure uniform input size
max_length = max(len(seq) for seq in sequences)
X_pad = pad_sequences(sequences, maxlen=max_length, padding='post')

# Prepare target variable
y = data['overall_sentiment'].values

# Splitting dataset into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

# Vocabulary size
vocab_size = len(tokenizer.word_index) + 1

# CNN model creation
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=50, input_length=max_length),
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(10, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model training
model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))

# Model summary for reference
model.summary()


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 396, 50)           339550    
                                                                 
 conv1d (Conv1D)             (None, 392, 128)          32128     
                                                                 
 global_max_pooling1d (Glob  (None, 128)               0         
 alMaxPooling1D)                                                 
                                                                 
 dense (Dense)               (None, 10)                1290      
                                                                 
 dense_1 (Dense)       

#**Calculate Accuracy**

In [2]:
# Evaluate the model's performance
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

print(f"Test Accuracy: {accuracy*100:.2f}%")

Test Accuracy: 94.16%


#**ANALYZING NEW SENTENCES**

In [3]:
def predict_sentiment(text):
    # Tokenize the text
    sequence = tokenizer.texts_to_sequences([text])
    # Pad the sequence
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')
    # Predict sentiment
    prediction = model.predict(padded_sequence)
    # Convert prediction to sentiment label
    sentiment = 'Negative' if prediction[0][0] > 0.5 else 'Positive'
    return sentiment, prediction[0][0]

# Example usage
sentence = "We cannot and will not be liable for any loss or damage arising from your failure to comply with the above."
sentiment, confidence = predict_sentiment(sentence)
print(f"Predicted Sentiment: {sentiment}")

sentence = "We may suspend or terminate your account or cease providing you with all or part of the Services"
sentiment, confidence = predict_sentiment(sentence)
print(f"Predicted Sentiment: {sentiment}")

sentence = "You are responsible for your use of the Services and for any Content you provide, including compliance with applicable laws, rules, and regulations."
sentiment, confidence = predict_sentiment(sentence)
print(f"Predicted Sentiment: {sentiment}")


Predicted Sentiment: Negative
Predicted Sentiment: Negative
Predicted Sentiment: Positive


In [4]:
import nltk
from nltk.tokenize import sent_tokenize

# Ensure you've downloaded the 'punkt' tokenizer models
nltk.download('punkt')

def print_negative_sentences(paragraph):
    count = 0
    # Split the paragraph into sentences
    sentences = sent_tokenize(paragraph)
    # Process each sentence
    for sentence in sentences:
        sentiment, confidence = predict_sentiment(sentence)
        if sentiment == 'Negative':
            count = count + 1
            print(f"Negative Sentence: {sentence} (Confidence: {confidence:.2f})")
            print(f"Count : {count}")

# Example usage
paragraph = "We may suspend or terminate your account or cease providing you with all or part of the Services at any time if we reasonably believe: (i) you have violated these Terms or our Rules and Policies, (ii) you create risk or possible legal exposure for us; (iii) your account should be removed due to unlawful conduct; (iv) your account should be removed due to prolonged inactivity; or (v) our provision of the Services to you is no longer commercially viable. We will make reasonable efforts to notify you by the email address associated with your account or the next time you attempt to access your account, depending on the circumstances. To the extent permitted by law, we may also terminate your account or cease providing you with all or part of the Services for any other reason or no reason at our convenience. In all such cases, the Terms shall terminate, including, without limitation, your license to use the Services, except that the following sections shall continue to apply: 2, 3, 5, 6, and the misuse provisions of Section 4 (“Misuse of the Services”). If you believe your account was terminated in error you can file an appeal following the steps found in our Help Center (https://help.x.com/forms/account-access/appeals). For the avoidance of doubt, these Terms survive the deactivation or termination of your account."


print_negative_sentences(paragraph)




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Negative Sentence: We may suspend or terminate your account or cease providing you with all or part of the Services at any time if we reasonably believe: (i) you have violated these Terms or our Rules and Policies, (ii) you create risk or possible legal exposure for us; (iii) your account should be removed due to unlawful conduct; (iv) your account should be removed due to prolonged inactivity; or (v) our provision of the Services to you is no longer commercially viable. (Confidence: 1.00)
Count : 1
Negative Sentence: To the extent permitted by law, we may also terminate your account or cease providing you with all or part of the Services for any other reason or no reason at our convenience. (Confidence: 0.98)
Count : 2


In [5]:
file_path = 'Spotify.txt'
with open(file_path, 'r', encoding='utf-8') as file:
    text = file.read()

print_negative_sentences(text)

Negative Sentence: you acknowledge that you have read and understood the agreements , accept these agreements , and agree to be bound by them . (Confidence: 1.00)
Count : 1
Negative Sentence: occasionally we may , in our discretion , make changes to the agreements . (Confidence: 0.98)
Count : 2
Negative Sentence: in some cases , we will notify you in advance , and your continued use of the service after the changes have been made will constitute your acceptance of the changes . (Confidence: 1.00)
Count : 3
Negative Sentence: spotify reserves the right , in its absolute discretion , to determine your eligibility for a trial , and , subject to applicable laws , to withdraw or to modify a trial at any time without prior notice and with no liability , to the greatest extent permitted under the law . (Confidence: 1.00)
Count : 4
Negative Sentence: in all cases , spotify reserves the right to remove or disable access to any user content for any or no reason , including but not limited to , u

In [6]:
file_path = 'amazon.txt'
with open(file_path, 'r', encoding='utf-8') as file:
    text = file.read()

print_negative_sentences(text)

Negative Sentence: We use your personal information to display interest-based ads for features , products , and services that might be of interest to you . (Confidence: 0.63)
Count : 1
Negative Sentence: Protection of Amazon Europe and Others : We release account and other personal information when we believe release is appropriate to comply with the law ; enforce or apply our Conditions of Use and other agreements ; or protect the rights , property or safety of Amazon Europe , our users or others . (Confidence: 1.00)
Count : 2
Negative Sentence: You can add or update certain information on pages such as those referenced in the Information You Can Access ? (Confidence: 1.00)
Count : 3
Negative Sentence: Sellers can add or update certain information in Seller Central , update their account information by accessing their Seller Account Information , and adjust the email or other communications they receive from us by updating their Notification Preferences . (Confidence: 0.96)
Count : 4


#**SAVING THE MODEL**

In [7]:
# Save model architecture as JSON
model_json = model.to_json()
with open('model_architecture.json', 'w') as json_file:
    json_file.write(model_json)

# Save model weights
model.save_weights('my_model_weights.h5')

In [8]:
from tensorflow.keras.models import load_model
import pickle

# Assuming `model` is your trained Keras model and `tokenizer` is your Keras tokenizer

# Save the model
model.save('sentiment_model.h5')

# Save the tokenizer
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

  saving_api.save_model(


#**RUNNING THE FLASK WEB SERVER**

In [12]:
# Install required packages
!pip install flask

# Import necessary libraries
from flask import Flask, request, render_template_string
import pickle
from tensorflow.keras.models import load_model
import nltk
from keras.preprocessing.sequence import pad_sequences

# Download the required NLTK data
nltk.download('punkt')

# Load the saved model and tokenizer
model = load_model('sentiment_model.h5')
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

# Create Flask app
app = Flask(__name__)

# Define the route for the main page
@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        text = request.form['text']
        negative_sentences, count = analyze_sentiment(text)
        return render_template_string(html_template, negative_sentences=negative_sentences, count=count)
    return render_template_string(html_template)

# Function to analyze sentiment
def analyze_sentiment(paragraph):
    negative_sentences = []
    count = 0
    sentences = nltk.tokenize.sent_tokenize(paragraph)
    for sentence in sentences:
        sequence = tokenizer.texts_to_sequences([sentence])
        #padded_sequence = pad_sequences(sequence, maxlen=X.shape[1])
        padded_sequence = pad_sequences(sequence, maxlen=max_length)
        #padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')
        prediction = model.predict(padded_sequence)[0][0]
        if prediction >= 0.5:
            count += 1
            negative_sentences.append(f'<span style="color:red;">{sentence}</span>')
    return negative_sentences, count

# HTML template
html_template = """
<!DOCTYPE html>
<html>
  <head>
    <title>Sentiment Analysis</title>
    <style>
      body {
        font-family: Arial, sans-serif;
        background-color: #f5f5f5;
        padding: 20px;
      }
      h1 {
        color: #333;
        text-align: center;
      }
      form {
        max-width: 600px;
        margin: 0 auto;
        background-color: #fff;
        padding: 20px;
        border-radius: 5px;
        box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
      }
      textarea {
        width: 100%;
        height: 150px;
        padding: 10px;
        font-size: 16px;
        border: 1px solid #ccc;
        border-radius: 4px;
        box-sizing: border-box;
      }
      input[type=submit] {
        background-color: #4CAF50;
        color: white;
        padding: 10px 20px;
        border: none;
        border-radius: 4px;
        cursor: pointer;
        font-size: 16px;
      }
      input[type=submit]:hover {
        background-color: #45a049;
      }
      .result {
        max-width: 600px;
        margin: 20px auto 0;
        background-color: #fff;
        padding: 20px;
        border-radius: 5px;
        box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
      }
    </style>
  </head>
  <body>
    <h1>Sentiment Analysis</h1>
    <form method="post">
      <label for="text">Enter text:</label><br>
      <textarea name="text" rows="5" cols="50"></textarea><br>
      <input type="submit" value="Analyze">
    </form>
    {% if negative_sentences %}
    <div class="result">
      <p><strong>Total number of negative sentences: {{ count }}</strong></p>
      <p>Negative sentences:</p>
      {% for sentence in negative_sentences %}
      <p>{{ sentence|safe }}</p>
      {% endfor %}
    </div>
    {% endif %}
  </body>
</html>
"""

# Run the Flask app
if __name__ == '__main__':

    from google.colab.output import eval_js
    print(eval_js("google.colab.kernel.proxyPort(5000)"))
    app.run()



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


https://anqa9bgtgmm-496ff2e9c6d22116-5000-colab.googleusercontent.com/
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [02/Apr/2024 07:06:48] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [02/Apr/2024 07:06:49] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -




INFO:werkzeug:127.0.0.1 - - [02/Apr/2024 07:07:10] "POST / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [02/Apr/2024 07:07:10] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
