In [1]:
!pip install transformers textblob vaderSentiment torch




In [2]:
from transformers import MarianMTModel, MarianTokenizer
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import torch


In [3]:
# !pip install sentencepiece


In [4]:
model_name = 'Helsinki-NLP/opus-mt-en-es'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/826k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.59M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

In [5]:
def analyze_sentiment(text):
    # Using TextBlob
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity

    # Using VADER
    analyzer = SentimentIntensityAnalyzer()
    vader_scores = analyzer.polarity_scores(text)

    return polarity, vader_scores

def categorize_emotion(polarity):
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'


In [6]:
def translate_with_emotion(text):
    # Analyze sentiment
    polarity, vader_scores = analyze_sentiment(text)
    emotion = categorize_emotion(polarity)

    # Translate text
    inputs = tokenizer(text, return_tensors="pt", padding=True)
    translated = model.generate(**inputs)
    translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]

    return translated_text, emotion, vader_scores


In [7]:
sample_text = "I am so happy to see you!"
translated_text, emotion, vader_scores = translate_with_emotion(sample_text)

print(f"Original Text: {sample_text}")
print(f"Translated Text: {translated_text}")
print(f"Emotion: {emotion}")
print(f"VADER Scores: {vader_scores}")


Original Text: I am so happy to see you!
Translated Text: ¡Estoy tan feliz de verte!
Emotion: positive
VADER Scores: {'neg': 0.0, 'neu': 0.544, 'pos': 0.456, 'compound': 0.7213}


In [8]:
def process_dataset(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    results = []
    for line in lines:
        translated_text, emotion, vader_scores = translate_with_emotion(line.strip())
        results.append((line.strip(), translated_text, emotion, vader_scores))

    return results

train_results = process_dataset('clean_train.txt')
test_results = process_dataset('clean_test.txt')


In [None]:
def process_dataset(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    results = []
    for line in lines:
        translated_text, emotion, vader_scores = translate_with_emotion(line.strip())
        results.append((line.strip(), translated_text, emotion, vader_scores))

    return results

val_results = process_dataset('clean_val.txt')

In [None]:
def evaluate_model(results):
    correct_predictions = 0
    total = len(results)
    
    for original, translated, emotion, vader_scores in results:
        # Add your logic here to compare or evaluate
        # For simplicity, let's assume we are checking if the emotion categorization is consistent
        predicted_emotion = categorize_emotion(analyze_sentiment(original)[0])
        if predicted_emotion == emotion:
            correct_predictions += 1

    accuracy = correct_predictions / total
    return accuracy

val_accuracy = evaluate_model(val_results)
print(f"Validation Accuracy: {val_accuracy:.2f}")


In [None]:
test_accuracy = evaluate_model(test_results)
print(f"Test Accuracy after Fine-tuning: {test_accuracy:.2f}")


In [None]:
# To display results
print("Sample Results from Training Data:")
for original, translated, emotion, vader_scores in train_results[:5]:
    print(f"Original: {original}")
    print(f"Translated: {translated}")
    print(f"Emotion: {emotion}")
    print(f"VADER Scores: {vader_scores}")
    print("-" * 50)

# To save results to a file
with open('translated_train_results.txt', 'w') as f:
    for original, translated, emotion, vader_scores in train_results:
        f.write(f"Original: {original}\n")
        f.write(f"Translated: {translated}\n")
        f.write(f"Emotion: {emotion}\n")
        f.write(f"VADER Scores: {vader_scores}\n")
        f.write("-" * 50 + "\n")

with open('translated_test_results.txt', 'w') as f:
    for original, translated, emotion, vader_scores in test_results:
        f.write(f"Original: {original}\n")
        f.write(f"Translated: {translated}\n")
        f.write(f"Emotion: {emotion}\n")
        f.write(f"VADER Scores: {vader_scores}\n")
        f.write("-" * 50 + "\n")


In [None]:
import pickle
with open('model.pickle','wb') as f:
    pickle.dump(model,f)