In [1]:
import pandas as pd
import numpy as np
from deep_translator import GoogleTranslator
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import os

### Translate foreign language texts

In [None]:
# This code works the fastest and is the most accurate
# however, the average time to translate text is ~8 mins
def translate_file(file_path, target_language='en'):
    translated_lines = []
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        for i in range(0, len(lines), 10):
            batch_lines = lines[i:i+10]
            translated_batch = GoogleTranslator(source='auto', target=target_language).translate_batch(batch_lines)
            translated_lines.extend(translated_batch)
            for translated_line in translated_batch:
                print(translated_line)
    return translated_lines

In [2]:
#Now the above with saving to files
def translate_file_and_save(file_path, target_language='en'):
    translated_lines = []
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        output_directory = 'RandomManifestos\Translations'
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)
        output_file_path = os.path.join(output_directory, os.path.basename(file_path))
        with open(output_file_path, 'w', encoding='utf-8') as output_file:
            for i in range(0, len(lines), 10):
                batch_lines = lines[i:i+10]
                translated_batch = GoogleTranslator(source='auto', target=target_language).translate_batch(batch_lines)
                translated_lines.extend(translated_batch)
                for translated_line in translated_batch:
                    output_file.write(translated_line + '\n')
    return translated_lines


  output_directory = 'RandomManifestos\Translations'


In [4]:
directory = "RandomManifestos/"
for file_name in os.listdir(directory):
    if file_name.endswith('.txt'):
        input_file_path = os.path.join(directory, file_name)
        translate_file_and_save(input_file_path)
        print(f"Translated '{file_name}' and saved to 'Translate/{file_name}'")

Translated '92022.txt' and saved to 'Translate/92022.txt'
Translated '96630.txt' and saved to 'Translate/96630.txt'


In [None]:
import dl_translate as dlt
# This code is super slow, cant take hours to analyze a single file
def translate_file(file_path, target_language=dlt.lang.ENGLISH):
    translated_lines = []
    mt = dlt.TranslationModel("nllb200")
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        for i in range(0, len(lines), 10):
            batch_lines = lines[i:i+10]
            translated_batch = [mt.translate(line, source=dlt.lang.CROATIAN, target=target_language) for line in batch_lines]
            translated_lines.extend(translated_batch)
            for translated_line in translated_batch:
                print(translated_line)
    return translated_lines


In [None]:
#translate_file("2015-Country-Manifestos\Croatia\81032.txt")

In [None]:
import ollama

: 

In [1]:
#Now using the Ollama local LLM
from ollama import Client
message = {'role': 'user', 'content': 'Why is the sky blue?'}
response = Client().chat(model='llama2', messages=[message])

ResponseError: model 'llama2' not found, try pulling it first

### Use Vadar to get sentiment scores

In [None]:
from langdetect import detect

def detect_language(text):
    try:
        return detect(text)
    except:
        return None  # Handle error or unknown language


In [None]:
from polyglot.text import Text

text = Text(your_text_here, hint_language_code='es')  # Example for Spanish
sentiment = text.polarity


In [None]:
from langdetect import detect
from polyglot.text import Text

# Function to detect language
def detect_language(text):
    try:
        return detect(text)
    except Exception as e:
        print(f"Error detecting language: {e}")
        return None

# Function to perform sentiment analysis
def analyze_sentiment(text):
    polyglot_text = Text(text)
    return polyglot_text.polarity

In [None]:
# Reading the text file
file_path = '2015-Country-Manifestos\Spain\33020.txt'
with open(file_path, 'r', encoding='utf-8') as file:
    text = file.read()

# Detecting the language
language = detect_language(text)
print(f"Detected language: {language}")

# If a language is detected, perform sentiment analysis
if language:
    sentiment_score = analyze_sentiment(text)
    print(f"Sentiment score: {sentiment_score}")
else:
    print("Could not detect language or language is not supported for sentiment analysis.")


In [None]:
import os
from textblob import TextBlob
from googletrans import Translator, LANGUAGES

# Initialize the translator
translator = Translator()

def detect_language(text):
    # Use googletrans to detect the language
    return translator.detect(text).lang

def translate_to_english(text, src_lang):
    # If the text is already in English, no translation is needed
    if src_lang == "en":
        return text
    # Translate the text to English
    translated_text = translator.translate(text, src=src_lang, dest='en').text
    return translated_text

def sentiment_analysis(text):
    # Perform sentiment analysis using TextBlob
    analysis = TextBlob(text)
    return analysis.sentiment

def process_files(directory):
    # List all txt files in the directory
    files = [f for f in os.listdir(directory) if f.endswith('.txt')]
    
    for file in files:
        filepath = os.path.join(directory, file)
        with open(filepath, 'r', encoding='utf-8') as f:
            text = f.read()
            lang = detect_language(text)
            print(f"Processing '{file}' in {LANGUAGES.get(lang, 'Unknown')}...")
            
            # Translate to English if necessary
            english_text = translate_to_english(text, lang)
            sentiment = sentiment_analysis(english_text)
            print(f"Sentiment of '{file}': {sentiment}\n")

# Specify the directory containing your txt files
directory = "2015-Country-Manifestos\Spain"
process_files(directory)


In [None]:
import os
from transformers import pipeline

# Initialize the pipelines
translation_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-xx-en")
sentiment_pipeline = pipeline("sentiment-analysis")

def translate_to_english(text):
    # Translate the text to English
    translated_text = translation_pipeline(text, max_length=512)[0]['translation_text']
    return translated_text

def sentiment_analysis(text):
    # Perform sentiment analysis
    result = sentiment_pipeline(text)[0]
    return result

def process_files(directory):
    files = [f for f in os.listdir(directory) if f.endswith('.txt')]
    
    for file in files:
        filepath = os.path.join(directory, file)
        with open(filepath, 'r', encoding='utf-8') as f:
            text = f.read()
            print(f"Processing '{file}'...")
            
            # Translate to English
            english_text = translate_to_english(text)
            sentiment = sentiment_analysis(english_text)
            print(f"Sentiment of '{file}': {sentiment['label']} with a score of {sentiment['score']}\n")

# Specify the directory containing your txt files
#directory = "2015-Country-Manifestos\Spain"
#process_files(directory)


In [None]:
with open(filepath, 'r', encoding='utf-8') as f:
    text = f.read()
    print(f"Processing '{file}'...")
    english_text = translate_to_english(text)

In [9]:
filepath = '2015-Country-Manifestos/Spain/33020.txt'
with open(filepath, 'r', encoding='utf-8') as f:
    text = f.read()
    print(model.translate(text, target_lang='en'))
    

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
  sentences = list(re.findall(u'[^!?。\.]+[!?。\.]*', text, flags=re.U))


KeyboardInterrupt: 

In [None]:
def translate(x):
    try:
        blob=TextBlob(x)
        return (str(blob.translate(to = 'en')))
    except: 
        return None

In [None]:
from easynmt import EasyNMT
model = EasyNMT('opus-mt')

#Translate a single sentence to German
print(model.translate('This is a sentence we want to translate to German', target_lang='de'))

#Translate several sentences to German
sentences = ['You can define a list with sentences.',
             'All sentences are translated to your target language.',
             'Note, you could also mix the languages of the sentences.']
print(model.translate(sentences, target_lang='de'))

In [7]:
#Translate several sentences to German
sentences = ['You can define a list with sentences.',
             'All sentences are translated to your target language.',
             'Note, you could also mix the languages of the sentences.']
print(model.translate(sentences, target_lang='de'))

['Sie können eine Liste mit Sätzen definieren.', 'Alle Sätze werden in Ihre Zielsprache übersetzt.', 'Beachten Sie, Sie können auch die Sprachen der Sätze mischen.']


In [None]:
def sentiment_scores(sentence):
	sid_obj = SentimentIntensityAnalyzer()
	# polarity_scores method of SentimentIntensityAnalyzer
	# object gives a sentiment dictionary.
	# which contains pos, neg, neu, and compound scores.
	sentiment_dict = sid_obj.polarity_scores(sentence)
	
	print("Overall sentiment dictionary is : ", sentiment_dict)
	print("sentence was rated as ", sentiment_dict['neg']*100, "% Negative")
	print("sentence was rated as ", sentiment_dict['neu']*100, "% Neutral")
	print("sentence was rated as ", sentiment_dict['pos']*100, "% Positive")

	print("Sentence Overall Rated As", end = " ")

	# decide sentiment as positive, negative and neutral
	if sentiment_dict['compound'] >= 0.05 :
		print("Positive")

	elif sentiment_dict['compound'] <= - 0.05 :
		print("Negative")

	else :
		print("Neutral")


In [None]:
with open("2015-Country-Manifestos\United Kingdom\51110.txt", 'r', encoding='utf-8') as file:
    lines = file.readlines()
    for line in lines:
        print(sentiment_scores(line))


    #return translated_lines

