In [39]:
import os
import csv
import numpy as np


def merge_all_csv_files_in_directory(directory_path):
    """
    Merge all CSV files in a directory into a single CSV file.

    Args:
        directory_path (str): Path to the directory containing CSV files.

    Returns:
        None
    """
    merged_data = {}
    languages = set()

    # List all files in the directory
    csv_files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]

    # Iterate through CSV files and gather data
    for file_name in csv_files:
        file_path = os.path.join(directory_path, file_name)
        with open(file_path, mode='r', encoding='utf-8') as csv_file:
            csv_reader = csv.DictReader(csv_file)
            for row in csv_reader:
                language = row['Language']
                title = row['Title']
                if language not in merged_data:
                    merged_data[language] = {}
                merged_data[language][file_name] = title
                languages.add(language)

    # Create a list of dictionaries for the merged data
    merged_list = []
    for language in languages:
        merged_dict = {'Language': language}
        for file_name in csv_files:
            title = merged_data.get(language, {}).get(file_name, '')
            merged_dict[file_name] = title
        merged_list.append(merged_dict)

    # Create a merged CSV file
    merged_csv_path = os.path.join(directory_path, 'merged_translations.csv')
    with open(merged_csv_path, mode='w', encoding='utf-8', newline='') as merged_csv_file:
        fieldnames = ['Language'] + csv_files
        csv_writer = csv.DictWriter(merged_csv_file, fieldnames=fieldnames)
        csv_writer.writeheader()
        csv_writer.writerows(merged_list)

# Directory path where the CSV files are located
csv_directory = 'dataframes/clean_translations/'

# Merge all CSV files into a single CSV file
merge_all_csv_files_in_directory(csv_directory)

print("Merged data saved to 'merged_translations.csv'")


Merged data saved to 'merged_translations.csv'


In [None]:
import pandas as pd

df = pd.read_csv('dataframes/clean_translations/merged_translations.csv', encoding='utf-8')

In [41]:
df.transpose()

Unnamed: 0,0,1,2
Language,es,de,en
Logical connective_translated.csv,Conectiva lógica,Logische Verknüpfung,Logical connective
Basis (linear algebra)_translated.csv,Base (álgebra),Basis (Vektorraum),Basis (linear algebra)
Ternary operation_translated.csv,Operación ternaria,Ternärer Operator,Ternary operation
Description logic_translated.csv,Lógica de descripción,Beschreibungslogik,Description logic
...,...,...,...
"(ε, δ)-definition of limit_translated.csv",Límite de una función,Grenzwert (Funktion),"(ε, δ)-definition of limit"
Limit (mathematics)_translated.csv,Límite (matemática),Grenzwert,Limit (mathematics)
Zeros and poles_translated.csv,Polo (análisis complejo),Polstelle,Zeros and poles
A priori and a posteriori_translated.csv,A priori y a posteriori,Vor und nach,A priori and a posteriori


In [42]:
df = df.replace('NaN', np.nan)

# Count NaN values in each column
nan_count = df.isna().sum()

# Filter columns with NaN values
columns_with_nan = nan_count[nan_count > 0]

print(columns_with_nan)

Series([], dtype: int64)


In [35]:
df.shape

(3, 230)

In [43]:
trans_df=df.transpose()

In [44]:
# Specify the file path where you want to save the CSV file
file_path = 'dictionary_data.csv'

# Use the to_csv method to write the DataFrame to a CSV file
trans_df.to_csv(file_path, index=False)  # Set index=False to exclude the index column in the CSV


In [3]:
import pandas as pd

# Read data from the CSV file
df = pd.read_csv('dictionary_data.csv')

def search_dataframe(language, search_string):
    if language == 'es':
        column = 'es'
    elif language == 'de':
        column = 'de'
    elif language == 'en':
        column = 'en'
    else:
        return "Invalid language code. Please use 'es', 'de', or 'en'."

    # Perform the partial string match search
    result = df[df[column].str.contains(search_string, case=False, na=False)]
    
    return result

# Example usage:
language = input("Enter language code (es/de/en): ")
search_string = input("Enter search string: ")

result = search_dataframe(language, search_string)

if not result.empty:
    print(result)
else:
    print("No matching rows found.")


Enter language code (es/de/en):  es
Enter search string:  li


                                    es                           de  \
4                                Lista                        Liste   
15                      Álgebra lineal              Lineare Algebra   
31      Sistema de ecuaciones lineales    Lineares Gleichungssystem   
40                      Multiplicación               Multiplikation   
62   Geometría constructiva de sólidos  Constructive Solid Geometry   
79                        Módulo libre                 Freier Modul   
84                Geometría euclidiana        Euklidische Geometrie   
87                        Probabilidad           Wahrscheinlichkeit   
108             Multiplicación escalar         Skalarmultiplikation   
122               Geometría euclidiana        Euklidische Geometrie   
126             Inverso multiplicativo                     Kehrwert   
127              Geometría hiperbólica      Hyperbolische Geometrie   
132               Anillo de polinomios                  Polynomring   
140   

In [9]:

import pandas as pd

# Read data from the CSV file
df = pd.read_csv('dictionary_data.csv')

def search_dataframe(language, search_string):
    if language == 'es':
        column = 'es'
    elif language == 'de':
        column = 'de'
    elif language == 'en':
        column = 'en'
    else:
        return "Invalid language code. Please use 'es', 'de', or 'en'."

    # Perform the partial string match search
    result = df[df[column].str.contains(search_string, case=False, na=False)]
    
    return result

def display_options(result):
    if result.empty:
        print("No matching rows found.")
        return

    print("Choose an option:")
    for i, option in enumerate(result.iterrows(), 1):
        index, row = option
        print(f"{i}. {row[language]}")

    choice = int(input("Enter the number of your choice: "))

    if choice < 1 or choice > len(result):
        print("Invalid choice.")
    else:
        chosen_option = result.iloc[choice - 1]
        print("\nChosen Option:")
        print(f"{chosen_option[language]}\n")
        for col in df.columns:
            if col != language:
                print(f"{col}: {chosen_option[col]}")

# Example usage:
language = input("Enter language code (es/de/en): ")
search_string = input("Enter search string: ")

result = search_dataframe(language, search_string)
display_options(result)


Enter language code (es/de/en):  es
Enter search string:  lími


Choose an option:
1. Límite de una función
2. Límite de una sucesión
3. Límite de una función
4. Límite (matemática)
5. Límite superior y límite inferior


Enter the number of your choice:  5



Chosen Option:
Límite superior y límite inferior

de: Limes superior und Limes inferior
en: Limit superior and limit inferior


In [10]:
import pandas as pd
from unidecode import unidecode  # Import unidecode

# Read data from the CSV file
df = pd.read_csv('dictionary_data.csv')

# Define a function to remove accents from a string
def remove_accents(input_str):
    return unidecode(input_str)

def search_dataframe(language, search_string):
    # Remove accents from the search string
    search_string = remove_accents(search_string)
    
    if language == 'es':
        column = 'es'
    elif language == 'de':
        column = 'de'
    elif language == 'en':
        column = 'en'
    else:
        return "Invalid language code. Please use 'es', 'de', or 'en'."

    # Remove accents from the data in the selected column
    df[column] = df[column].apply(remove_accents)

    # Perform the partial string match search
    result = df[df[column].str.contains(search_string, case=False, na=False)]
    
    return result

def display_options(result):
    if result.empty:
        print("No matching rows found.")
        return

    print("Choose an option:")
    for i, option in enumerate(result.iterrows(), 1):
        index, row = option
        print(f"{i}. {row[language]}")

    choice = int(input("Enter the number of your choice: "))

    if choice < 1 or choice > len(result):
        print("Invalid choice.")
    else:
        chosen_option = result.iloc[choice - 1]
        print("\nChosen Option:")
        print(f"{chosen_option[language]}\n")
        for col in df.columns:
            if col != language:
                print(f"{col}: {chosen_option[col]}")

# Example usage:
language = input("Enter language code (es/de/en): ")
search_string = input("Enter search string: ")

result = search_dataframe(language, search_string)
display_options(result)


Enter language code (es/de/en):  es
Enter search string:  limi


Choose an option:
1. Limite de una funcion
2. Limite de una sucesion
3. Limite de una funcion
4. Limite (matematica)
5. Limite superior y limite inferior


Enter the number of your choice:  4



Chosen Option:
Limite (matematica)

de: Grenzwert
en: Limit (mathematics)


In [None]:
import pandas as pd
from unidecode import unidecode

# Read data from the CSV file
df = pd.read_csv('dictionary_data.csv')

# Define a function to remove accents from a string
def remove_accents(input_str):
    return unidecode(input_str)

def search_dataframe(language, search_string):
    # Remove accents from the search string
    search_string = remove_accents(search_string)
    
    if language == 'es':
        column = 'es'
    elif language == 'de':
        column = 'de'
    elif language == 'en':
        column = 'en'
    else:
        return "Invalid language code. Please use 'es', 'de', or 'en'."

    # Remove accents from the data in the selected column
    df[column] = df[column].apply(remove_accents)

    # Perform the partial string match search
    result = df[df[column].str.contains(search_string, case=False, na=False)]
    
    return result

def display_options(result):
    if result.empty:
        print("No matching rows found.")
        return

    print("Choose an option:")
    options = []
    for i, option in enumerate(result.iterrows(), 1):
        index, row = option
        options.append(row[language])

    for i, option in enumerate(options, 1):
        print(f"{i}. {option}")

    choice = input("Enter the number of your choice: ")

    if not choice.isdigit() or int(choice) < 1 or int(choice) > len(options):
        print("Invalid choice.")
    else:
        choice = int(choice)
        chosen_option = result.iloc[choice - 1]
        print("\nChosen Option:")
        print(f"{chosen_option[language]}\n")
        for col in df.columns:
            if col != language:
                print(f"{col}: {chosen_option[col]}")

# Language options for the dropdown list
languages = ['es', 'de', 'en']

# Prompt the user to select a language from the menu
while True:
    print("Select a language:")
    for i, lang in enumerate(languages, 1):
        print(f"{i}. {lang}")

    choice = input("Enter the number of your choice: ")

    if not choice.isdigit() or int(choice) < 1 or int(choice) > len(languages):
        print("Invalid choice. Please select a valid language.")
    else:
        language = languages[int(choice) - 1]
        break

search_string = input("Enter search string: ")

result = search_dataframe(language, search_string)
display_options(result)


In [None]:
import pandas as pd
from unidecode import unidecode

def remove_accents(input_str):
    return unidecode(input_str)

def search_and_display(language, search_string, df_path='dictionary_data.csv'):
    try:
        # Read data from the CSV file
        df = pd.read_csv(df_path)

        # Remove accents from the search string
        search_string = remove_accents(search_string)

        if language not in ['es', 'de', 'en']:
            return "Invalid language code. Please use 'es', 'de', or 'en'."

        # Define the column based on the selected language
        column = language

        # Remove accents from the data in the selected column
        df[column] = df[column].apply(remove_accents)

        # Perform the partial string match search
        result = df[df[column].str.contains(search_string, case=False, na=False)]

        if result.empty:
            return "No matching rows found."

        print("Choose an option:")
        options = []
        for i, (_, row) in enumerate(result.iterrows(), 1):
            options.append(row[language])
            print(f"{i}. {row[language]}")

        choice = input("Enter the number of your choice: ")

        if not choice.isdigit() or int(choice) < 1 or int(choice) > len(options):
            return "Invalid choice."
        else:
            choice = int(choice)
            chosen_option = result.iloc[choice - 1]
            output = f"\nChosen Option:\n{chosen_option[language]}\n"
            for col in df.columns:
                if col != language:
                    output += f"{col}: {chosen_option[col]}\n"
            return output

    except Exception as e:
        return str(e)

# Language options for the dropdown list
languages = ['es', 'de', 'en']

# Prompt the user to select a language from the menu
while True:
    print("Select a language:")
    for i, lang in enumerate(languages, 1):
        print(f"{i}. {lang}")

    choice = input("Enter the number of your choice: ")

    if not choice.isdigit() or int(choice) < 1 or int(choice) > len(languages):
        print("Invalid choice. Please select a valid language.")
    else:
        language = languages[int(choice) - 1]
        break

search_string = input("Enter search string: ")

result = search_and_display(language, search_string)

print(result)
