In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import transformers
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import csv
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
%matplotlib inline
%config InlineBackend.figure_format='retina'
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 12, 8
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transformers.logging.set_verbosity(transformers.logging.CRITICAL)

## Run BERT

### Select Model

In [2]:
model_name = 'distilbert'

cols = ['history', 'history_aave', 'sae_gen', 'aave_gen']

if model_name == 'distilbert':
    model = 'distilbert-base-uncased-finetuned-sst-2-english'
elif model_name == 'roberta_base':
    model = 'cardiffnlp/twitter-roberta-base-sentiment'
elif model_name == 'roberta_large':
    model = 'siebert/sentiment-roberta-large-english'
else:
    raise Exception('MODEL NOT FOUND!!!')

print(f'using model: {model}')
classifier = pipeline('sentiment-analysis', device=0, model=model)

using model: distilbert-base-uncased-finetuned-sst-2-english


### Run Analysis

In [10]:
file_path = 'runs/13/dialogpt/dailydialog_all.csv'
output_path = 'runs/13/dialogpt/sentiment/dailydialog_sentiment.csv'

In [11]:
cols = ['history', 'history_aave', 'sae_gen', 'aave_gen']
for model_name in ['distilbert', 'roberta_base', 'roberta_large']:
    if model_name == 'distilbert':
        model = 'distilbert-base-uncased-finetuned-sst-2-english'
    elif model_name == 'roberta_base':
        model = 'cardiffnlp/twitter-roberta-base-sentiment'
    elif model_name == 'roberta_large':
        model = 'siebert/sentiment-roberta-large-english'
    else:
        raise Exception('MODEL NOT FOUND!!!')
    print(f'using model: {model}')
    classifier = pipeline('sentiment-analysis', device=0, model=model)
    analyze_gt()
    analyze_all()

using model: distilbert-base-uncased-finetuned-sst-2-english


100%|██████████| 5181/5181 [01:15<00:00, 68.83it/s]
100%|██████████| 5181/5181 [02:41<00:00, 32.12it/s]


using model: cardiffnlp/twitter-roberta-base-sentiment


100%|██████████| 5181/5181 [02:24<00:00, 35.79it/s]
100%|██████████| 5181/5181 [04:46<00:00, 18.07it/s]


using model: siebert/sentiment-roberta-large-english


100%|██████████| 5181/5181 [04:17<00:00, 20.11it/s]
100%|██████████| 5181/5181 [08:32<00:00, 10.11it/s]


#### Only GT

In [4]:
def analyze_gt():
    f = open(output_path[:-4] + '_' + model_name + '_gt.csv', 'a', encoding='utf-8')
    writer = csv.writer(f)
    writer.writerow(['groundtruth', 'groundtruth_aave'])
    df = pd.read_csv(file_path)

    for i in tqdm(range(len(df))):
        row = df.iloc[i]
        scores = []
        for col in ['groundtruth', 'groundtruth_aave']:
            text = row[col]
            if text != text:
                text = " "
            if len(text) > 510:
                text = text[:510]
            score = classifier(text)
            scores.append(score[0]['label'])
        writer.writerow(scores)

    f.close()

#### All

In [5]:
def analyze_all():
    f = open(output_path[:-4] + '_' + model_name + '.csv', 'a', encoding='utf-8')
    writer = csv.writer(f)
    writer.writerow(cols)
    df = pd.read_csv(file_path)

    for i in tqdm(range(len(df))):
        row = df.iloc[i]
        scores = []
        for col in cols:
            text = row[col]
            if text != text:
                text = " "
            if len(text) > 510:
                text = text[:510]
            score = classifier(text)
            scores.append(score[0]['label'])
        writer.writerow(scores)

    f.close()

### Stats

In [16]:
f = open('runs/13/dialogpt/stats/cornell_movie_gt_stats.csv', 'w', encoding='utf-8', newline='')
writer = csv.writer(f)
for m in ['roberta_large', 'roberta_base', 'distilbert']:
    for col in ['groundtruth', 'groundtruth_aave']:
        df = pd.read_csv(f'runs/13/dialogpt/sentiment/cornell_movie_sentiment_{m}_gt.csv')
        print(f'{m}, {col}')
        print(df[col].value_counts().sort_index())
        writer.writerow([m, col])
        writer.writerow(df[col].value_counts().sort_index())
f.close()

roberta_large, groundtruth
NEGATIVE    7357
POSITIVE    3264
Name: groundtruth, dtype: int64
roberta_large, groundtruth_aave
NEGATIVE    7393
POSITIVE    3228
Name: groundtruth_aave, dtype: int64
roberta_base, groundtruth
LABEL_0    2895
LABEL_1    6772
LABEL_2     954
Name: groundtruth, dtype: int64
roberta_base, groundtruth_aave
LABEL_0    3000
LABEL_1    6620
LABEL_2    1001
Name: groundtruth_aave, dtype: int64
distilbert, groundtruth
NEGATIVE    8191
POSITIVE    2430
Name: groundtruth, dtype: int64
distilbert, groundtruth_aave
NEGATIVE    8116
POSITIVE    2505
Name: groundtruth_aave, dtype: int64


In [17]:
f = open('runs/13/dialogpt/stats/cornell_movie_stats.csv', 'w', encoding='utf-8', newline='')
writer = csv.writer(f)
for m in ['roberta_large', 'roberta_base', 'distilbert']:
    for d in ['aave_gen', 'sae_gen']:
        df = pd.read_csv(f'runs/13/bst/sentiment/cornell_movie_sentiment_{m}.csv')
        print(f'{m}, {d}')
        print(df[d].value_counts().sort_index())
        writer.writerow([m, d])
        writer.writerow(df[d].value_counts().sort_index())
f.close()

roberta_large, aave_gen
NEGATIVE    5448
POSITIVE    4314
Name: aave_gen, dtype: int64
roberta_large, sae_gen
NEGATIVE    5416
POSITIVE    4346
Name: sae_gen, dtype: int64
roberta_base, aave_gen
LABEL_0    2717
LABEL_1    3718
LABEL_2    3327
Name: aave_gen, dtype: int64
roberta_base, sae_gen
LABEL_0    2742
LABEL_1    3687
LABEL_2    3333
Name: sae_gen, dtype: int64
distilbert, aave_gen
NEGATIVE    4472
POSITIVE    5290
Name: aave_gen, dtype: int64
distilbert, sae_gen
NEGATIVE    4418
POSITIVE    5344
Name: sae_gen, dtype: int64


In [None]:
f = open('stats.csv', 'w', encoding='utf-8', newline='')
for d in dialects:
    for col in ['prompt', 'truth', 'generation']:
        df = pd.read_csv('runs/09 casino/' + model_name + '/' + d + '_' + col + '_sentimental_analysis.csv', header=None)
        writer = csv.writer(f)
        writer.writerow(df.iloc[:, 1].value_counts().sort_index())
        print(f'{model_name}, {d}, {col}')
        print(df.iloc[:, 1].value_counts().sort_index())
        print()
f.close()

In [8]:
for t in temp:
    for d in dialects:
        print(f'starting analyzation on dialect: {d}, temperature: {t} using {model_name}')
        df = pd.read_csv('runs/' + output_path +'/Generation/1_feature/temp=' + str(t) + '_' + str(d) + '_gen_txt.csv')
        with open('runs/' + output_path + '/Sentiment Analysis/1_feature/' + model_name + '/temp=' + str(t) + '_' + str(d) + '_sentiment.csv', 'a', encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(['prompt', 'truth', 'generation'])
            for i in tqdm(range(df.shape[0])):
                prompt = classifier(df.iloc[i]['prompt'])
                truth = classifier(df.iloc[i]['truth'])
                generation = classifier(df.iloc[i]['generation'])
                writer.writerow([prompt[0]['label'], truth[0]['label'], generation[0]['label']])

starting analyzation on dialect: sae, temperature: 0.1 using roberta_base


100%|██████████| 489/489 [00:18<00:00, 26.92it/s]


starting analyzation on dialect: aave, temperature: 0.1 using roberta_base


100%|██████████| 487/487 [00:16<00:00, 29.51it/s]


starting analyzation on dialect: sae, temperature: 0.5 using roberta_base


100%|██████████| 489/489 [00:16<00:00, 29.60it/s]


starting analyzation on dialect: aave, temperature: 0.5 using roberta_base


100%|██████████| 487/487 [00:15<00:00, 30.45it/s]


starting analyzation on dialect: sae, temperature: 0.7 using roberta_base


100%|██████████| 489/489 [00:15<00:00, 31.01it/s]


starting analyzation on dialect: aave, temperature: 0.7 using roberta_base


100%|██████████| 487/487 [00:15<00:00, 30.53it/s]


starting analyzation on dialect: sae, temperature: 1.0 using roberta_base


100%|██████████| 489/489 [00:15<00:00, 30.59it/s]


starting analyzation on dialect: aave, temperature: 1.0 using roberta_base


100%|██████████| 487/487 [00:15<00:00, 30.50it/s]


starting analyzation on dialect: sae, temperature: 1.2 using roberta_base


100%|██████████| 489/489 [00:15<00:00, 30.64it/s]


starting analyzation on dialect: aave, temperature: 1.2 using roberta_base


100%|██████████| 487/487 [00:16<00:00, 29.62it/s]


starting analyzation on dialect: sae, temperature: 1.5 using roberta_base


100%|██████████| 489/489 [00:16<00:00, 29.16it/s]


starting analyzation on dialect: aave, temperature: 1.5 using roberta_base


100%|██████████| 487/487 [00:16<00:00, 29.74it/s]


starting analyzation on dialect: sae, temperature: 2.0 using roberta_base


100%|██████████| 489/489 [00:16<00:00, 30.15it/s]


starting analyzation on dialect: aave, temperature: 2.0 using roberta_base


100%|██████████| 487/487 [00:16<00:00, 29.80it/s]


## Run VADER

### VADER Helper Function

In [25]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# function to print sentiments
# of the sentence.
def sentiment_scores(sentence):
 
    # Create a SentimentIntensityAnalyzer object.
    sid_obj = SentimentIntensityAnalyzer()
 
    # polarity_scores method of SentimentIntensityAnalyzer
    # object gives a sentiment dictionary.
    # which contains pos, neg, neu, and compound scores.
    sentiment_dict = sid_obj.polarity_scores(sentence)
 
    # decide sentiment as positive, negative and neutral
    if sentiment_dict['compound'] >= 0.05 :
        return "Positive"
 
    elif sentiment_dict['compound'] <= - 0.05 :
        return "Negative"
 
    else :
        return "Neutral"

### Run Analysis on Generation

In [26]:
temp = [0.1, 0.5, 0.7, 1.0, 1.2, 1.5]
dialects = ['sae', 'aave']

In [28]:
for t in temp:
    for d in dialects:
        print(f'starting analyzation on dialect: {d}, temperature: {t}')
        df = pd.read_csv('runs/' + output_path +'/Generation/temp=' + str(t) + '/' + d + '_gen_txt.csv')
        with open('runs/' + output_path +'/VADER/temp=' + str(t) + '/' + d + '_gen_txt_sentimental_analysis.csv', 'a', encoding="utf-8") as f:
            writer = csv.writer(f)
            for i, txt in enumerate(tqdm(df.gen)):
                results = sentiment_scores(txt)
                writer.writerow([i, results])

starting analyzation on dialect: sae, temperature: 0.1


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 137.62it/s]


starting analyzation on dialect: aave, temperature: 0.1


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 140.45it/s]


starting analyzation on dialect: sae, temperature: 0.5


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 138.32it/s]


starting analyzation on dialect: aave, temperature: 0.5


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 139.27it/s]


starting analyzation on dialect: sae, temperature: 0.7


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 141.97it/s]


starting analyzation on dialect: aave, temperature: 0.7


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 136.83it/s]


starting analyzation on dialect: sae, temperature: 1.0


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 142.29it/s]


starting analyzation on dialect: aave, temperature: 1.0


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 136.67it/s]


starting analyzation on dialect: sae, temperature: 1.2


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 137.62it/s]


starting analyzation on dialect: aave, temperature: 1.2


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 140.64it/s]


starting analyzation on dialect: sae, temperature: 1.5


100%|███████████████████████████████████████████████████████████████████████████████| 434/434 [00:03<00:00, 142.78it/s]


starting analyzation on dialect: aave, temperature: 1.5


100%|███████████████████████████████████████████████████████████████████████████████| 420/420 [00:03<00:00, 136.71it/s]


### Run Analysis on Original

In [31]:
for d in ['sae', 'aave']:
    for col in ['prompt', 'prompt_cont']:
        print(f'{d}; {col}')
        df = pd.read_csv('runs/' + output_path +'/Generation/temp=0.1/' + d + '_gen_txt.csv')
        with open('runs/' + output_path +'/VADER/' + d + '_' + col + '_sentimental_analysis.csv', 'a', encoding="utf-8") as f:
            writer = csv.writer(f)
            for i, txt in enumerate(tqdm(df[col])):
                results = sentiment_scores(txt[2:-2])
                writer.writerow([i, results])

sae; prompt


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 137.56it/s]


sae; prompt_cont


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 139.41it/s]


aave; prompt


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 134.80it/s]


aave; prompt_cont


100%|███████████████████████████████████████████████████████████████████████████████| 492/492 [00:03<00:00, 127.73it/s]


In [32]:
# df = pd.read_csv("runs/03 EMNLP SAE-AAVE Pairs/sae_samples.tsv", sep="\t")
# with open('runs/03 EMNLP SAE-AAVE Pairs/VADER/sae_second_seg_sentimental_analysis.csv', 'a', encoding="utf-8") as f:
#     writer = csv.writer(f)
#     for i, txt in enumerate(tq.tqdm(df.second_seg)):
#         results = sentiment_scores(txt)
#         writer.writerow([i, results])

### Stats

In [32]:
f = open('stats.csv', 'w', encoding='utf-8', newline='')
for d in dialects:
    for col in ['prompt', 'prompt_cont']:
        df = pd.read_csv('runs/' + output_path +'/VADER/' + d + '_' + col + '_sentimental_analysis.csv', header=None)
        writer = csv.writer(f)
        writer.writerow(df.iloc[:, 1].value_counts().sort_index())
        print(f'VADER, {d}, {col}')
        print(df.iloc[:, 1].value_counts().sort_index())
        print()
f.close()

VADER, sae, prompt
Negative     78
Neutral     299
Positive    115
Name: 1, dtype: int64

VADER, sae, prompt_cont
Negative    103
Neutral     294
Positive     95
Name: 1, dtype: int64

VADER, aave, prompt
Negative     77
Neutral     300
Positive    115
Name: 1, dtype: int64

VADER, aave, prompt_cont
Negative     97
Neutral     296
Positive     99
Name: 1, dtype: int64



In [33]:
f = open('stats.csv', 'w', encoding='utf-8', newline='')
for t in temp:
    for d in dialects:
        df = pd.read_csv('runs/' + output_path +'/VADER/temp=' + str(t) + '/' + str(d) + '_gen_txt_sentimental_analysis.csv', header=None)
        writer = csv.writer(f)
        writer.writerow(df.iloc[:, 1].value_counts().sort_index())
        print(f'model: VADER; generation: {d}; temperature {t}')
        print(df.iloc[:, 1].value_counts().sort_index())
        print()
f.close()

model: VADER; generation: sae; temperature 0.1
Negative     62
Neutral     341
Positive     89
Name: 1, dtype: int64

model: VADER; generation: aave; temperature 0.1
Negative     80
Neutral     326
Positive     86
Name: 1, dtype: int64

model: VADER; generation: sae; temperature 0.5
Negative     45
Neutral     358
Positive     89
Name: 1, dtype: int64

model: VADER; generation: aave; temperature 0.5
Negative     60
Neutral     319
Positive    113
Name: 1, dtype: int64

model: VADER; generation: sae; temperature 0.7
Negative     59
Neutral     335
Positive     98
Name: 1, dtype: int64

model: VADER; generation: aave; temperature 0.7
Negative     63
Neutral     322
Positive    107
Name: 1, dtype: int64

model: VADER; generation: sae; temperature 1.0
Negative     48
Neutral     337
Positive    107
Name: 1, dtype: int64

model: VADER; generation: aave; temperature 1.0
Negative     70
Neutral     327
Positive     95
Name: 1, dtype: int64

model: VADER; generation: sae; temperature 1.2
Negat

## Run Text Blob

### Text Blob Helper Function

In [34]:
from textblob import TextBlob

def textblob_score(sentence):
    result = TextBlob(sentence)
    if result.sentiment.polarity == 0:
        return "Neutral"
    elif result.sentiment.polarity > 0:
        return "Positive"
    else:
        return "Negative"

In [35]:
temp = [0.1, 0.5, 0.7, 1.0, 1.2, 1.5]
dialects = ['sae', 'aave']

### Run Analysis on Generation

In [36]:
for t in temp:
    for d in dialects:
        print(f'starting analyzation on dialect: {d}, temperature: {t}')
        df = pd.read_csv('runs/' + output_path +'/Generation/temp=' + str(t) + '/' + d + '_gen_txt.csv')
        with open('runs/' + output_path +'/TextBlob/temp=' + str(t) + '/' + d + '_gen_txt_sentimental_analysis.csv', 'a', encoding="utf-8") as f:
            writer = csv.writer(f)
            for i, txt in enumerate(tqdm(df.gen)):
                results = textblob_score(txt)
                writer.writerow([i, results])

starting analyzation on dialect: sae, temperature: 0.1


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 4729.78it/s]


starting analyzation on dialect: aave, temperature: 0.1


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 8198.22it/s]


starting analyzation on dialect: sae, temperature: 0.5


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 8337.13it/s]


starting analyzation on dialect: aave, temperature: 0.5


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 8198.18it/s]


starting analyzation on dialect: sae, temperature: 0.7


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 8337.16it/s]


starting analyzation on dialect: aave, temperature: 0.7


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 8337.09it/s]


starting analyzation on dialect: sae, temperature: 1.0


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 8337.20it/s]


starting analyzation on dialect: aave, temperature: 1.0


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 8337.23it/s]


starting analyzation on dialect: sae, temperature: 1.2


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 8480.93it/s]


starting analyzation on dialect: aave, temperature: 1.2


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 8337.13it/s]


starting analyzation on dialect: sae, temperature: 1.5


100%|██████████████████████████████████████████████████████████████████████████████| 434/434 [00:00<00:00, 8344.39it/s]


starting analyzation on dialect: aave, temperature: 1.5


100%|██████████████████████████████████████████████████████████████████████████████| 420/420 [00:00<00:00, 7922.75it/s]


### Run Analysis on Original

In [37]:
for d in ['sae', 'aave']:
    for col in ['prompt', 'prompt_cont']:
        print(f'TextBlob, {d}; {col}')
        df = pd.read_csv('runs/' + output_path +'/Generation/temp=0.1/' + d + '_gen_txt.csv')
        with open('runs/' + output_path +'/TextBlob/' + d + '_' + col + '_sentimental_analysis.csv', 'a', encoding="utf-8") as f:
            writer = csv.writer(f)
            for i, txt in enumerate(tqdm(df[col])):
                results = textblob_score(txt[2:-2])
                writer.writerow([i, results])

TextBlob, sae; prompt


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 7026.95it/s]


TextBlob, sae; prompt_cont


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 7807.73it/s]


TextBlob, aave; prompt


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 7567.64it/s]


TextBlob, aave; prompt_cont


100%|██████████████████████████████████████████████████████████████████████████████| 492/492 [00:00<00:00, 7567.70it/s]


### Stats

In [39]:
f = open('stats.csv', 'w', encoding='utf-8', newline='')
for d in ['sae', 'aave']:
    for col in ['prompt', 'prompt_cont']:
        df = pd.read_csv('runs/' + output_path +'/TextBlob/' + d + '_' + col + '_sentimental_analysis.csv', header=None)
        writer = csv.writer(f)
        writer.writerow(df.iloc[:, 1].value_counts().sort_index())
        print(f'TextBlob Gen, {d}, {col}')
        print(df.iloc[:, 1].value_counts().sort_index())
        print()
f.close()

TextBlob Gen, sae, prompt
Negative     57
Neutral     306
Positive    129
Name: 1, dtype: int64

TextBlob Gen, sae, prompt_cont
Negative     67
Neutral     309
Positive    116
Name: 1, dtype: int64

TextBlob Gen, aave, prompt
Negative     55
Neutral     318
Positive    119
Name: 1, dtype: int64

TextBlob Gen, aave, prompt_cont
Negative     49
Neutral     324
Positive    119
Name: 1, dtype: int64



In [40]:
f = open('stats.csv', 'w', encoding='utf-8', newline='')
for t in temp:
    for d in dialects:
        print(f'model: TextBlob; generation: {d}; temperature {t}')
        df = pd.read_csv('runs/' + output_path +'/TextBlob/temp=' + str(t) + '/' + str(d) + '_gen_txt_sentimental_analysis.csv', header=None)
        writer = csv.writer(f)
        writer.writerow(df.iloc[:, 1].value_counts().sort_index())
        print(df.iloc[:, 1].value_counts().sort_index())
        print()
f.close()

model: TextBlob; generation: sae; temperature 0.1
Negative     59
Neutral     345
Positive     88
Name: 1, dtype: int64

model: TextBlob; generation: aave; temperature 0.1
Negative     67
Neutral     314
Positive    111
Name: 1, dtype: int64

model: TextBlob; generation: sae; temperature 0.5
Negative     48
Neutral     329
Positive    115
Name: 1, dtype: int64

model: TextBlob; generation: aave; temperature 0.5
Negative     63
Neutral     312
Positive    117
Name: 1, dtype: int64

model: TextBlob; generation: sae; temperature 0.7
Negative     56
Neutral     325
Positive    111
Name: 1, dtype: int64

model: TextBlob; generation: aave; temperature 0.7
Negative     48
Neutral     320
Positive    124
Name: 1, dtype: int64

model: TextBlob; generation: sae; temperature 1.0
Negative     56
Neutral     331
Positive    105
Name: 1, dtype: int64

model: TextBlob; generation: aave; temperature 1.0
Negative     52
Neutral     314
Positive    126
Name: 1, dtype: int64

model: TextBlob; generation:

In [26]:
# df = pd.read_csv("runs/03 EMNLP SAE-AAVE Pairs/sae_gen_txt.csv")
# with open('runs/03 EMNLP SAE-AAVE Pairs/TextBlob/sae_gen_txt_sentimental_analysis.csv', 'a', encoding="utf-8") as f:
#     writer = csv.writer(f)
#     for i, txt in enumerate(tq.tqdm(df.txt)):
#         results = textblob_score(txt)
#         writer.writerow([i, results])