#ASPECT BASED SENTIMENT ANALYSIS

###Python script that demonstrates the analysis of sentiments by aspects using the NLTK library for analysis and pandas for data manipulation



In [None]:
#Importing necessary packages and libraries
import pandas as pd
import re
import string
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
#Downloading nltk vader lexicon
nltk.download('vader_lexicon')

####PREPROCESSING

In [None]:
def preprocess_text(text):
    #Converting text to lowercase
    text = text.lower()
    
    #Removing URLs
    text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
    
    #Removing punctuation
    text = text.translate(str.maketrans("", "", string.punctuation))
    
    #Tokenizing
    tokens = word_tokenize(text)
    
    #Removing stop words
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    
    #Lemmatizing the tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    #joining the tokens 
    preprocessed_text = ' '.join(tokens)
    
    return preprocessed_text


In [None]:
#Reading .csv file
df = pd.read_csv('DATA FILE NAME')

In [None]:
#Looking into data
df.head()

In [None]:
#Applying preprocessing function
df['clean_text'] = df['review-en'].apply(preprocess_text)


In [None]:
#Sentiment Intensity Analyzer
sia = SentimentIntensityAnalyzer()

#Sentiment analysis on reviews
df['sentiment_score'] = df['clean_text'].apply(lambda x: sia.polarity_scores(x)['compound'])

#Mapping sentiment scores (not necessary)
score_sentiment_mapping = {
    1: 'Very Negative',
    2: 'Negative',
    3: 'Neutral',
    4: 'Positive',
    5: 'Very Positive'
}

df['sentiment_label'] = df['sentiment_score'].map(score_sentiment_mapping)


In [None]:
#Defining aspects and corresponding keywords
aspects = {
    'Application': ['application', 'registration', 'account', 'lagging', 'features', 'support', 'update', 'improvement'],
    'Price': ['price','cost','expensive','affordable','fee','money','payment','prices','payments', 'cheap'],
    'Tutors': ['tutor', 'tutoring', 'teacher', 'helpful', 'knowledgeable','solution','answer','instructor','mentor','performance','attention','correct','wrong', 'response', 'accuracy','accurate','clear'],
    'Helpfullness': ['helpfulness','assistance','helpful', 'quality', 'exam', 'reliable', 'coach','assistance']
}

aspect_sentiments = {}  #dict to store sentiment scores 

#Calculating average sentiment scores 
for aspect, keywords in aspects.items():
    aspect_df = df[df['clean_text'].str.contains('|'.join(keywords), case=False)]
    average_sentiment = aspect_df['sentiment_score'].mean()
    aspect_sentiments[aspect] = average_sentiment

#Creating df from the aspect sentiments
aspect_sentiment_df = pd.DataFrame.from_dict(aspect_sentiments, orient='index', columns=['Average Sentiment Score'])
aspect_sentiment_df.index.name = 'Aspect'

print(aspect_sentiment_df)



              Average Sentiment Score
Aspect                               
Application                  0.405978
Price                        0.290372
Tutors                       0.317885
Helpfullness                 0.381162
