In [None]:
import psycopg2
from sshtunnel import SSHTunnelForwarder
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv
import os

In [None]:
# Load environment variables from .env file
load_dotenv()

# Get info from .env
SSH_HOST = os.getenv('SSH_HOST')
SSH_PORT = int(os.getenv('SSH_PORT'))
SSH_USER = os.getenv('SSH_USER')
SSH_PASSWORD = os.getenv('SSH_PASSWORD')

POSTGRES_HOST = os.getenv('POSTGRES_HOST')
POSTGRES_USER = os.getenv('POSTGRES_USER')
POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
POSTGRES_DB = os.getenv('POSTGRES_DB')
POSTGRES_PORT = int(os.getenv('POSTGRES_PORT'))


In [None]:
# SSH tunnel
tunnel = SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_password=SSH_PASSWORD,
    remote_bind_address=(POSTGRES_HOST, POSTGRES_PORT),
    local_bind_address=('127.0.0.1', 5433)
)
tunnel.start()

connection = psycopg2.connect(
    database=POSTGRES_DB,
    user=POSTGRES_USER,
    password=POSTGRES_PASSWORD,
    host='127.0.0.1',
    port=tunnel.local_bind_port
)
cursor = connection.cursor()


In [None]:
# Fetch comments from db
cursor.execute("SELECT id, comment FROM comments_fs WHERE comment IS NOT NULL")
comments = cursor.fetchall()

In [None]:
import pandas as pd
import nltk

# VADER sentiment analyser
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

In [None]:
# Function to classify sentiment
def classify_sentiment(score):
    if score['compound'] >= 0.05:
        return 1
    elif score['compound'] <= -0.05:
        return -1
    else:
        return 0

In [None]:
sql = """
ALTER TABLE comments_fs ADD COLUMN sentiment INTEGER;
"""

try:
    cursor.execute(sql)
    connection.commit()
except Exception as e:
    print(e)

In [None]:
# Update sentiment scores in the database
for comment_id, comment in comments:
    sentiment_score = sia.polarity_scores(comment)
    sentiment = classify_sentiment(sentiment_score)
    cursor.execute("UPDATE comments_fs SET sentiment = %s WHERE id = %s", (sentiment, comment_id))


In [None]:
# Commit changes and close connection
connection.commit()
cursor.close()
connection.close()
tunnel.close()

In [1]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [2]:
df = pd.read_csv('comments_fs.csv')

df.shape

(18161, 7)

In [3]:
# VADER sentiment analyser
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/muireannoconnor/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [4]:
# Classify sentiment
def classify_sentiment(score):
    if score['compound'] >= 0.05:
        return 1
    elif score['compound'] <= -0.05:
        return -1
    else:
        return 0

In [5]:
# Sentiment analysis for each comment
df['sentiment'] = df['comment'].apply(lambda comment: classify_sentiment(sia.polarity_scores(comment)))


In [8]:
df.head(20)

Unnamed: 0,id,name,address,comment,created_at,latitude,longitude,sentiment
0,1,Equinox Brookfield Place,"225 Liberty St (at West St), New York, NY 10281",Hot yoga is on point. Clear views across the H...,2019-12-31 17:12:40,40.711974,-74.016237,1
1,2,Equinox Brookfield Place,"225 Liberty St (at West St), New York, NY 10281","Gym floor is a bit uninspiring, but the locati...",2019-12-17 14:00:01,40.711974,-74.016237,1
2,3,Equinox Brookfield Place,"225 Liberty St (at West St), New York, NY 10281",This location has low ceilings and gets very c...,2019-01-14 04:06:52,40.711974,-74.016237,-1
3,4,Equinox Brookfield Place,"225 Liberty St (at West St), New York, NY 10281",Excellent! They have classes to motivate you a...,2018-09-25 10:38:14,40.711974,-74.016237,1
4,5,Equinox Brookfield Place,"225 Liberty St (at West St), New York, NY 10281","Great views and nice lounge area, but overall ...",2018-07-10 22:41:19,40.711974,-74.016237,1
5,6,Asphalt Green,"212 N End Ave (Murray), New York, NY 10282","Hidden gym with basketball courts, pools, weig...",2017-03-21 03:00:56,40.711974,-74.016237,0
6,7,Asphalt Green,"212 N End Ave (Murray), New York, NY 10282","The equipment seemed solid, and I came for the...",2015-09-05 21:08:31,40.711974,-74.016237,1
7,8,Asphalt Green,"212 N End Ave (Murray), New York, NY 10282",It's really expensive and the camp was really ...,2015-01-25 03:30:28,40.711974,-74.016237,-1
8,9,Asphalt Green,"212 N End Ave (Murray), New York, NY 10282",The camp isn't good and it is OK i mean it is ...,2015-01-25 03:18:23,40.711974,-74.016237,1
9,10,Asphalt Green,"212 N End Ave (Murray), New York, NY 10282","Really great gym option if you swim, have a fa...",2014-08-27 01:03:33,40.711974,-74.016237,1


In [7]:
# Write to new CSV file
df.to_csv('sentiment_scores2.csv', index=False)