In [None]:
import psycopg2
from sshtunnel import SSHTunnelForwarder
from transformers import pipeline
from dotenv import load_dotenv
import os


In [None]:
load_dotenv()

SSH_HOST = os.getenv('SSH_HOST')
SSH_PORT = int(os.getenv('SSH_PORT'))
SSH_USER = os.getenv('SSH_USER')
SSH_PASSWORD = os.getenv('SSH_PASSWORD')

POSTGRES_HOST = os.getenv('POSTGRES_HOST')
POSTGRES_USER = os.getenv('POSTGRES_USER')
POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
POSTGRES_DB = os.getenv('POSTGRES_DB')
POSTGRES_PORT = int(os.getenv('POSTGRES_PORT'))

In [None]:
tunnel = SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_password=SSH_PASSWORD,
    remote_bind_address=(POSTGRES_HOST, POSTGRES_PORT),
    local_bind_address=('127.0.0.1', 5433)
)

tunnel.start()

connection = psycopg2.connect(
    database=POSTGRES_DB,
    user=POSTGRES_USER,
    password=POSTGRES_PASSWORD,
    host='127.0.0.1',
    port=tunnel.local_bind_port
)

cursor = connection.cursor()

In [None]:
sql = """
ALTER TABLE comments_fs ADD COLUMN sentiment INTEGER;
"""

try:
    cursor.execute(sql)
    connection.commit()
except Exception as e:
    print(e)

In [None]:
# Fetch comments from db
cursor.execute("SELECT id, comment FROM comments_fs WHERE comment IS NOT NULL")
comments = cursor.fetchall()

In [None]:
# sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

In [None]:
# scores into db
for comment_id, comment in comments:
    sentiment = sentiment_pipeline(comment)[0]
    sentiment_label = sentiment['label']
    sentiment_score = 1 if sentiment_label == 'POSITIVE' else -1 if sentiment_label == 'NEGATIVE' else 0
    cursor.execute("UPDATE comments_fs SET sentiment = %s WHERE id = %s", (sentiment_score, comment_id))


In [None]:
connection.commit()
cursor.close()
connection.close()
tunnel.close()

In [1]:
#import torch
import pandas as pd
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
df = pd.read_csv('comments_fs.csv')

df.shape

(18161, 7)

In [5]:
# Sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

RuntimeError: At least one of TensorFlow 2.0 or PyTorch should be installed. To install TensorFlow 2.0, read the instructions at https://www.tensorflow.org/install/ To install PyTorch, read the instructions at https://pytorch.org/.

In [None]:
# Function to classify sentiment
def classify_sentiment(comment):
    sentiment = sentiment_pipeline(comment)[0]
    sentiment_label = sentiment['label']
    return 1 if sentiment_label == 'POSITIVE' else -1 if sentiment_label == 'NEGATIVE' else 0


In [None]:
# Apply sentiment analysis to each comment
df['sentiment'] = df['comment'].apply(classify_sentiment)

df.head(20)

In [None]:
# Save the results to a new CSV file
df.to_csv('comments_with_sentiment.csv', index=False)
