# Part 3 - Sentiment Analysis 

# 1. Importing Libraries
### 1.1 Libraries

In [1]:
#Library to Access Amazon
import boto3

# Library to provide data structures(dataframes)
import pandas as pd
import numpy as np

### 1.2 Configuration

In [2]:
# Importing Credentials from config file
from configs import configs

# Authentication Amazon Comprehend
comprehend=boto3.client(
                         service_name="comprehend",  # Name of the Service
                         region_name="us-east-2", # region for the Service
                         aws_access_key_id=configs["amazon_credentials"]["AWS_SERVER_PUBLIC_KEY"], # Access Credentials
                         aws_secret_access_key=configs["amazon_credentials"]["AWS_SERVER_SECRET_KEY"] # Access Credentials
                       )

# Directory from pre-processing
processed_tweet_df_path = "../Data/processed_tweet_data.csv" 

# Directory to store the new data with sentiments
processed_tweet_df_path_with_sentiment = "../Data/processed_tweet_df_path_with_sentiment.csv"

### 1.3 Loading Dataframe

In [3]:
# Loading the data to dataframe
tweet_df = pd.read_csv(processed_tweet_df_path, index_col=False)

In [1]:
#Nan
tweet_df = tweet_df[tweet_df["tweet"].notna()]

NameError: name 'tweet_df' is not defined

## 2. Performing Sentiment Analysis
### 2.1 Defining Functions

In [4]:
def apply_comprehend(tweet):
    
    """
    This function uses amazon comprehend to predict the sentiment behind the tweet.
    
    Parameter:
    tweet: Each tweet.
    
    Return "result": A dictionary that contains the overall sentiment of the tweet, followed by the sentiment score under 
    Positive, Negative,Neutral, and mixed categories.    
    """
    
    try:
            out=comprehend.detect_sentiment(
                                             Text=tweet,
                                             LanguageCode="en"
                                           )
            #Dictionary
            result = {
                        "Sentiment": out["Sentiment"],
                        "SentimentScore_Positive": out["SentimentScore"]["Positive"],
                        "SentimentScore_Negative": out["SentimentScore"]["Negative"],
                        "SentimentScore_Neutral": out["SentimentScore"]["Neutral"],
                        "SentimentScore_Mixed": out["SentimentScore"]["Mixed"]  
                     }
            
            
            
    except Exception as e:
        # Incase of an error arises, nan values are returned. (Dictionary)
        result = {
                    "Sentiment": np.nan,
                    "SentimentScore_Positive": np.nan,
                    "SentimentScore_Negative": np.nan,
                    "SentimentScore_Neutral": np.nan,
                    "SentimentScore_Mixed": np.nan 
                 }

        print(f"Exception: {e}")
            
    return result

### 2.2 Performing Sentiment Analysis

In [6]:
#Time
from time import time
start_time = time()

#Using dataframe.apply function to apply the apply_comprehend function on each and every row of the dataframe.
#Storing the results into the dataframe under new columns.

tweet_df_with_sentiment = tweet_df.merge(
                                        tweet_df.tweet.apply(lambda x: (
                                                                        sentiment_score := apply_comprehend(x),


                                                                        pd.Series(
                                                                                      {
                                                                                       'Sentiment': sentiment_score["Sentiment"],
                                                                                       'SentimentScore_Positive': sentiment_score["SentimentScore_Positive"],
                                                                                       'SentimentScore_Negative': sentiment_score["SentimentScore_Negative"],
                                                                                       'SentimentScore_Neutral':sentiment_score["SentimentScore_Neutral"],
                                                                                       'SentimentScore_Mixed': sentiment_score["SentimentScore_Mixed"],
                                                                                      }
                                                                                  )
                                                                        )[-1]
                                                            ), 

                                         left_index=True,
                                         right_index=True
                                    )

#Time taken
print(f"\nTotal time = {time() - start_time} s")


Total time = 718.1327052116394 s


In [7]:
# Adding index number 
tweet_df_with_sentiment.index.names = ['Index_number']

In [8]:
# Verifying the structure
tweet_df_with_sentiment.shape

(4223, 14)

### 2.3 Viewing Results

In [9]:
#Viewing the results
tweet_df_with_sentiment.sample(5)

Unnamed: 0_level_0,tweet,date,author,hashtags,followers_count,friends_count,coordinates,retweet_count,favorite_count,Sentiment,SentimentScore_Positive,SentimentScore_Negative,SentimentScore_Neutral,SentimentScore_Mixed
Index_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1301,attent everyon mani peopl n't know space inves...,2022-02-22 11:40:28+00:00,tu_spark,[],48,508,,0,1,NEUTRAL,0.001148,0.002327,0.996476,4.9e-05
3580,help old ladi freez death enrich peopl destroy...,2022-02-22 10:52:29+00:00,BernhardSteinb4,[],134,26,,0,0,MIXED,0.008521,0.130838,0.384766,0.475875
2889,moon via /r/bitcoin,2022-02-22 11:05:35+00:00,btc_fan,[],261,5,,0,0,NEUTRAL,0.015457,0.014115,0.907606,0.062822
957,chri record old silk road bitcoin rap remix ho...,2022-02-22 11:48:17+00:00,EBTURKfx,['HODLGANG'],46,39,,0,0,NEUTRAL,0.000629,0.000374,0.998975,2.2e-05
254,stockmarket babydog bitcoin,2022-02-22 12:02:08+00:00,deris_bo,"['BabyDogeCoin', 'BabyDogeArmy', 'stockmarkets...",447,1074,,1,3,NEUTRAL,0.000304,0.000531,0.999125,4e-05


## 3. Saving Dataframe

In [1]:
#creating csv in the "processed_tweet_df_path_with_sentiment" path 
tweet_df_with_sentiment.to_csv(processed_tweet_df_path_with_sentiment, index=None)

NameError: name 'tweet_df_with_sentiment' is not defined