In [4]:
# Step 1: Install Required Libraries
!pip install transformers textblob vaderSentiment torch

# Step 2: Import Libraries
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import torch

# Step 3: Load Multilingual Translation Model
model_name = 'facebook/m2m100_418M'
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)

# List of languages with their codes
languages = {
    "Hindi": "hi",
    "Tamil": "ta",
    "Spanish": "es",
    "French": "fr",
    "German": "de",
    "Chinese": "zh"
}

# Step 4: Define Sentiment Analysis Functions
def analyze_sentiment(text):
    # Using TextBlob
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity

    # Using VADER
    analyzer = SentimentIntensityAnalyzer()
    vader_scores = analyzer.polarity_scores(text)

    return polarity, vader_scores

def categorize_emotion(polarity):
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'

# Step 5: Define Translation Function with Emotion Contextualization
def translate_with_emotion(text, target_lang):
    # Analyze sentiment
    polarity, vader_scores = analyze_sentiment(text)
    emotion = categorize_emotion(polarity)

    # Set source and target languages for translation
    tokenizer.src_lang = "en"  # Source language is English
    inputs = tokenizer(text, return_tensors="pt", padding=True)

    # Set the target language
    target_lang_id = tokenizer.get_lang_id(target_lang)
    model.config.forced_bos_token_id = target_lang_id

    # Perform translation
    translated = model.generate(**inputs)
    translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]

    return translated_text, emotion, vader_scores

# Step 6: Test the Model with Sample Text
sample_text = "I am so happy to see you!"
for lang_name, lang_code in languages.items():
    translated_text, emotion, vader_scores = translate_with_emotion(sample_text, lang_code)
    print(f"Original Text: {sample_text}")
    print(f"Translated to {lang_name}: {translated_text}")
    print(f"Emotion: {emotion}")
    print(f"VADER Scores: {vader_scores}")
    print("-" * 50)

# Step 7: Load and Process Your Datasets
def process_dataset(file_path, target_lang):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    results = []
    for line in lines:
        translated_text, emotion, vader_scores = translate_with_emotion(line.strip(), target_lang)
        results.append((line.strip(), translated_text, emotion, vader_scores))
    
    return results

# Processing datasets for Hindi
train_results = process_dataset('clean_train.txt', "hi")
test_results = process_dataset('clean_test.txt', "hi")
val_results = process_dataset('clean_val.txt', "hi")

# Step 8: Evaluate the Model
def evaluate_model(results):
    correct_predictions = 0
    total = len(results)
    
    for original, translated, emotion, vader_scores in results:
        # Compare the predicted emotion with the original emotion
        predicted_emotion = categorize_emotion(analyze_sentiment(original)[0])
        if predicted_emotion == emotion:
            correct_predictions += 1

    accuracy = correct_predictions / total
    return accuracy

# Evaluate on validation data
val_accuracy = evaluate_model(val_results)
print(f"Validation Accuracy: {val_accuracy:.2f}")

# Step 9: Re-evaluate on the Test Set
test_accuracy = evaluate_model(test_results)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Step 10: Save the Final Model and Results
with open('final_val_results.txt', 'w') as f:
    for original, translated, emotion, vader_scores in val_results:
        f.write(f"Original: {original}\n")
        f.write(f"Translated: {translated}\n")
        f.write(f"Emotion: {emotion}\n")
        f.write(f"VADER Scores: {vader_scores}\n")
        f.write("-" * 50 + "\n")

with open('final_test_results.txt', 'w') as f:
    for original, translated, emotion, vader_scores in test_results:
        f.write(f"Original: {original}\n")
        f.write(f"Translated: {translated}\n")
        f.write(f"Emotion: {emotion}\n")
        f.write(f"VADER Scores: {vader_scores}\n")
        f.write("-" * 50 + "\n")





Original Text: I am so happy to see you!
Translated to Hindi: मैं तुम्हें देखकर बहुत खुश हूँ!
Emotion: positive
VADER Scores: {'neg': 0.0, 'neu': 0.544, 'pos': 0.456, 'compound': 0.7213}
--------------------------------------------------
Original Text: I am so happy to see you!
Translated to Tamil: உங்களைப் பார்த்து மகிழ்ச்சியாக இருக்கிறேன்!
Emotion: positive
VADER Scores: {'neg': 0.0, 'neu': 0.544, 'pos': 0.456, 'compound': 0.7213}
--------------------------------------------------
Original Text: I am so happy to see you!
Translated to Spanish: ¡Estoy muy contento de verlo!
Emotion: positive
VADER Scores: {'neg': 0.0, 'neu': 0.544, 'pos': 0.456, 'compound': 0.7213}
--------------------------------------------------
Original Text: I am so happy to see you!
Translated to French: Je suis tellement heureux de te voir!
Emotion: positive
VADER Scores: {'neg': 0.0, 'neu': 0.544, 'pos': 0.456, 'compound': 0.7213}
--------------------------------------------------
Original Text: I am so happy 

In [None]:
import pickle
with open('model3.pickle','wb') as f:
    pickle.dump(model,f)

NameError: name 'model' is not defined