<center>
    
# Twitter Data Sentiment Analysis

**Programmer: Emmanuel Ndaliro**

    
</center>

### Importing necessary libraries

In [15]:
import pandas as pd
import pymongo
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

import gradio as gr

### Load and Inspect Data

In [16]:
df = pd.read_csv('Twitter_Data.csv')
df.head()

Unnamed: 0,clean_text,category
0,when modi promised “minimum government maximum...,-1.0
1,talk all the nonsense and continue all the dra...,0.0
2,what did just say vote for modi welcome bjp t...,1.0
3,asking his supporters prefix chowkidar their n...,1.0
4,answer who among these the most powerful world...,1.0


### Checking for Missing Values

In [17]:
df.isnull().sum()

clean_text    4
category      7
dtype: int64

### Removing Missing Values

In [18]:
df.dropna(inplace=True)

### Text Preprocessing

In [19]:
df['clean_text'] = df['clean_text'].str.lower()  # Converting text to lower case

### Data Splitting

In [20]:
X_train, X_test, y_train, y_test = train_test_split(df['clean_text'], df['category'], test_size=0.2, random_state=42)

### Feature Extraction

In [21]:
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

### Training the model

In [22]:
clf = MultinomialNB()
clf.fit(X_train_tfidf, y_train)

### Model Evaluation

In [23]:
predictions = clf.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, predictions)
print(f'Model Accuracy: {accuracy}')


Model Accuracy: 0.5674970853531325


### MongoDB Connection

In [24]:
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["TwitterAnalysisDB"]

### Storing Raw Data in MongoDB

In [25]:
db["RawData"].insert_many(df.to_dict('records'))

<pymongo.results.InsertManyResult at 0x2b60ea8e7a0>

### Storing Predictions in MongoDB

In [26]:
output_data = pd.DataFrame({'Text': X_test, 'Predicted_Sentiment': predictions})
db["PredictedData"].insert_many(output_data.to_dict('records'))

<pymongo.results.InsertManyResult at 0x2b6728cf790>

### Save the results to a CSV file

In [27]:
# output_data.to_csv('sentiment_analysis_results.csv', index=False)
sentiment_labels = {1: 'Positive', 0: 'Neutral', -1: 'Negative'}
output_data['Sentiment_Label'] = output_data['Predicted_Sentiment'].map(sentiment_labels)
output_data.to_csv('sentiment_analysis_results.csv', index=False)

### Creating  Gradio Interface

In [28]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Load the sentiment analyzed CSV
df = pd.read_csv('sentiment_analysis_results.csv')

# Define the input and output components of the Gradio interface
text_input = "textbox"  # Use a string for input, not gr.inputs.Textbox()
sentiment_output = "label"  # Use a string for output, not gr.outputs.Label()

def calculate_sentiment(text):
    """Calculates the sentiment of the text.

    Args:
        text: A string containing the text to be analyzed.

    Returns:
        A string representing the sentiment of the text.
    """
    
    # For simplicity, we can look for the exact text in the CSV
    # This is a naive approach; ideally, you'd use your model here
    sentiment_label = df[df['Text'] == text]['Sentiment_Label'].values
    
    # Check if text found in CSV
    if len(sentiment_label) > 0:
        return sentiment_label[0]
    else:
        # If text not found, optionally calculate sentiment using VADER
        vader = SentimentIntensityAnalyzer()
        sentiment_score = vader.polarity_scores(text)
        if sentiment_score['compound'] >= 0.05:
            return "Positive"
        elif sentiment_score['compound'] <= -0.05:
            return "Negative"
        else:
            return "Neutral"

# Create the Gradio interface with the 'calculate_sentiment' function as 'fn'
interface = gr.Interface(fn=calculate_sentiment, inputs=text_input, outputs=sentiment_output)

# Launch the Gradio interface
interface.launch(share=True)





Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://ae2ec99c6472545b21.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


