The first portion of this codebook translates only the column names

In [41]:
import pandas as pd
from deep_translator import GoogleTranslator

In [42]:
# Read your CSV file
df = pd.read_csv('Chatham House - Belarus - XVIII.xlsx - CTHw16_social contract_weights_.csv')

# Initialize the translator
translator = GoogleTranslator(source='ru', target='en')

# Translate the column names and store them in a list
translated_column_names = []
for col in df.columns:
    translated_column_names.append(translator.translate(col))

# Create a dictionary to map original column names to translated column names
translation_dict = dict(zip(df.columns, translated_column_names))

# Rename the columns using the translation dictionary
df_translated = df.rename(columns=translation_dict)

# Save the translated DataFrame to CSV, with the og rows of data 
#df_translated.to_csv('translated_dataframe.csv', index=False)

In [43]:
# Print the column names
print(df_translated.columns)

Index(['Response ID', 'Date Submitted', 'Status', 'Length of Interview',
       'In which country have you been living permanently for the last six months?',
       'Select the region in which you have lived for the last six months. If you live in Minsk, select Minsk',
       'Indicate the city where you live?',
       'Indicate the city where you live?.1',
       'Indicate the city where you live?.2',
       'Indicate the city where you live?.3',
       ...
       'Segments by degree of adoption (three main groups)',
       'Segments by degree of adoption (with pensioners highlighted)', 'Total',
       'Sum of points based on the degree of contract acceptance - without contract system',
       'Segments by degree of adoption (three main groups) - without contract system',
       'Segments by degree of acceptance (with pensioners highlighted) - without contract system',
       'Education level', 'Income per person (grouping)', 'Family status',
       'Internet using'],
      dtype='obj

The following portion of this codebook creates a mapper to translate the values of the columns in the csv with skipping logic for the fields that do not require translation

In [44]:
# Step 1: Create a dictionary with column names as keys and sets of unique values as values
unique_values_dict = {}

for col in df_translated.columns[4:]:
    if col == "Please indicate your age:":
        continue  # Skip this column
    unique_values_dict[col] = set(df_translated[col].drop_duplicates())

# Print the dictionary
print(unique_values_dict)


{'In which country have you been living permanently for the last six months?': {'Беларусь'}, 'Select the region in which you have lived for the last six months. If you live in Minsk, select Minsk': {'Брестская область', 'Витебская область', 'Гомельская область', 'г  Минск', 'Гродненская область', 'Минская область', 'Могилёвская область'}, 'Indicate the city where you live?': {'#NULL!', 'Каменец', 'Ивацевичи', 'Ляховичи', 'Кобрин', 'Лунинец', 'Брест', 'Ганцевичи', 'Береза', 'Барановичи', 'Белоозерск', 'Дрогичин', 'Иваново', 'Жабинка', 'Пинск'}, 'Indicate the city where you live?.1': {'Чашники', '#NULL!', 'Лепель', 'Браслав', 'Городок', 'Орша', 'Витебск', 'Полоцк', 'Новополоцк', 'Барань', 'Шумилино', 'Докшицы', 'Поставы', 'Глубокое', 'Толочин'}, 'Indicate the city where you live?.2': {'Октябрьский', '#NULL!', 'Калинковичи', 'Гомель', 'Мозырь', 'Светлогорск', 'Буда-Кошелево', 'Добруш', 'Речица', 'Наровля', 'Петриков', 'Житковичи', 'Лельчицы', 'Ельск', 'Хойники', 'Жлобин', 'Рогачев'}, 'Ind

In [45]:
from deep_translator.exceptions import NotValidPayload

# Function to translate values while skipping certain values
def translate_values(values_set):
    translations_dict = {}
    row_count = 0  # Counter for the number of translated rows
    for value in values_set:
        if value in ["#NULL", "Checked", "Unchecked", "nan"]:
            translations_dict[value] = value  # Skip translation for certain values
        else:
            try:
                translations_dict[value] = translator.translate(value)
                row_count += 1  # Increment the row counter
                print(f"Translated row {row_count}")
            except NotValidPayload:
                print(f"Did not translate '{value}'")
                translations_dict[value] = value  # Store the original value if translation fails
    return translations_dict

# Translate values and handle errors
translated_values_dict = {}
for key, values_set in unique_values_dict.items():
    try:
        translated_values_dict[key] = translate_values(values_set)
    except Exception as e:
        print(f"Error occurred while translating values for '{key}' key:", e)

# Print the translated values dictionary
print(translated_values_dict)

Translated row 1
Translated row 1
Translated row 2
Translated row 3
Translated row 4
Translated row 5
Translated row 6
Translated row 7
Translated row 1
Translated row 2
Translated row 3
Translated row 4
Translated row 5
Translated row 6
Translated row 7
Translated row 8
Translated row 9
Translated row 10
Translated row 11
Translated row 12
Translated row 13
Translated row 14
Translated row 15
Translated row 1
Translated row 2
Translated row 3
Translated row 4
Translated row 5
Translated row 6
Translated row 7
Translated row 8
Translated row 9
Translated row 10
Translated row 11
Translated row 12
Translated row 13
Translated row 14
Translated row 15
Translated row 1
Translated row 2
Translated row 3
Translated row 4
Translated row 5
Translated row 6
Translated row 7
Translated row 8
Translated row 9
Translated row 10
Translated row 11
Translated row 12
Translated row 13
Translated row 14
Translated row 15
Translated row 16
Translated row 17
Translated row 1
Translated row 2
Translated 

In [46]:
print(translated_values_dict)

{'In which country have you been living permanently for the last six months?': {'Беларусь': 'Belarus'}, 'Select the region in which you have lived for the last six months. If you live in Minsk, select Minsk': {'Брестская область': 'Brest region', 'Витебская область': 'Vitebsk region', 'Гомельская область': 'Gomel region', 'г  Минск': 'Minsk', 'Гродненская область': 'The Grodno region', 'Минская область': 'Minsk Region', 'Могилёвская область': 'Mogilev region'}, 'Indicate the city where you live?': {'#NULL!': '#NULL!', 'Каменец': 'Kamenets', 'Ивацевичи': 'Ivatsevichi', 'Ляховичи': 'Lyakhovichi', 'Кобрин': 'Kobrin', 'Лунинец': 'Luninets', 'Брест': 'Brest', 'Ганцевичи': 'Gantsevichi', 'Береза': 'Birch', 'Барановичи': 'Baranovichi', 'Белоозерск': 'Beloozersk', 'Дрогичин': 'Drogichin', 'Иваново': 'Ivanovo', 'Жабинка': 'Zhabinka', 'Пинск': 'Pinsk'}, 'Indicate the city where you live?.1': {'Чашники': 'Chashniki', '#NULL!': '#NULL!', 'Лепель': 'Lepel', 'Браслав': 'Braslav', 'Городок': 'Town', 

In [48]:
# Open the .txt file in write mode
with open('translated_values_dict.txt', 'w') as f:
    # Write the entire dictionary to the file
    f.write(str(translated_values_dict))

Now apply the mapper to the csv to translate the entire file (inshallah)

In [49]:
# read in this csv as the df, bc the columns here are english and thus match the mapper
df = pd.read_csv('translated_dataframe.csv')

# Translate values in each column using the translated_values_dict
for col in df.columns:
    if col in translated_values_dict:
        df[col] = df[col].map(translated_values_dict[col])

# Save the translated DataFrame to a new CSV file
df.to_csv('fully_translated_dataframe.csv', index=False)

print("Translation completed and saved to 'fully_translated_dataframe.csv'.")

Translation completed and saved to 'fully_translated_dataframe.csv'.
