In [47]:
import os
import csv
import numpy as np

def merge_all_csv_files_in_directory(directory_path):
    """
    Merge all CSV files in a directory into a single CSV file.

    Args:
        directory_path (str): Path to the directory containing CSV files.

    Returns:
        None
    """
    merged_data = {}
    languages = set()

    # List all files in the directory
    csv_files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]

    # Iterate through CSV files and gather data
    for file_name in csv_files:
        file_path = os.path.join(directory_path, file_name)
        with open(file_path, mode='r', encoding='utf-8') as csv_file:
            csv_reader = csv.DictReader(csv_file)
            for row in csv_reader:
                language = row['Language']
                title = row['Title']
                if language not in merged_data:
                    merged_data[language] = {}
                merged_data[language][file_name] = title
                languages.add(language)

    # Create a list of dictionaries for the merged data
    merged_list = []
    for language in languages:
        merged_dict = {'Language': language}
        for file_name in csv_files:
            title = merged_data.get(language, {}).get(file_name, '')
            merged_dict[file_name] = title
        merged_list.append(merged_dict)

    # Create a merged CSV file
    merged_csv_path = os.path.join(directory_path, 'merged_translations.csv')
    with open(merged_csv_path, mode='w', encoding='utf-8', newline='') as merged_csv_file:
        fieldnames = ['Language'] + csv_files
        csv_writer = csv.DictWriter(merged_csv_file, fieldnames=fieldnames)
        csv_writer.writeheader()
        csv_writer.writerows(merged_list)

# Directory path where the CSV files are located
csv_directory = 'dataframes/translations/'

# Merge all CSV files into a single CSV file
merge_all_csv_files_in_directory(csv_directory)

print("Merged data saved to 'merged_translations.csv'")


KeyError: 'Title'

In [16]:
df = pd.read_csv('dataframes/translations/merged_translations.csv', encoding='utf-8')

In [13]:
df

Unnamed: 0,english,spanish,german
0,Decimal point,,
1,Separador decimal,,
2,Dezimaltrennzeichen,,
3,Inversive geometry,,
4,,,
...,...,...,...
313,Límite (matemática),,
314,,,
315,A priori and a posteriori,,
316,A priori y a posteriori,,


In [18]:
import os
import csv
import pandas as pd

def merge_all_csv_files_in_directory(directory_path):
    """
    Merge all CSV files in a directory into a single CSV file.

    Args:
        directory_path (str): Path to the directory containing CSV files.

    Returns:
        pd.DataFrame: A DataFrame containing the merged data.
    """
    merged_data = {}
    languages = set()

    # List all files in the directory
    csv_files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]

    # Iterate through CSV files and gather data
    for file_name in csv_files:
        file_path = os.path.join(directory_path, file_name)
        with open(file_path, mode='r', encoding='utf-8') as csv_file:
            csv_reader = csv.DictReader(csv_file)
            for row in csv_reader:
                language = row['Language']
                title = row['Title']
                if language not in merged_data:
                    merged_data[language] = {}
                merged_data[language][file_name] = title
                languages.add(language)

    # Create a list of dictionaries for the merged data
    merged_list = []
    for language in languages:
        merged_dict = {'Language': language}
        for file_name in csv_files:
            title = merged_data.get(language, {}).get(file_name, '')
            merged_dict[file_name] = title
        merged_list.append(merged_dict)

    # Create a merged CSV file
    merged_csv_path = os.path.join(directory_path, 'merged_translations.csv')
    with open(merged_csv_path, mode='w', encoding='utf-8', newline='') as merged_csv_file:
        fieldnames = ['Language'] + csv_files
        csv_writer = csv.DictWriter(merged_csv_file, fieldnames=fieldnames)
        csv_writer.writeheader()
        csv_writer.writerows(merged_list)

    # Create a DataFrame from the merged data
    merged_df = pd.DataFrame(merged_list)

    return merged_df

# Directory path where the CSV files are located
csv_directory = 'dataframes/translations/'

# Merge all CSV files into a single CSV file and save as a DataFrame
merged_dataframe = merge_all_csv_files_in_directory(csv_directory)
merged_dataframe.to_csv('merged_translations_dataframe.csv', index=False, encoding='utf-8')

print("Merged data saved to 'merged_translations.csv' and 'merged_translations_dataframe.csv'")


Merged data saved to 'merged_translations.csv' and 'merged_translations_dataframe.csv'


In [19]:
merged_dataframe

Unnamed: 0,Language,Decimal point_translated.csv,Inversive geometry_translated.csv,Synthetic geometry_translated.csv,Limit of a function_translated.csv,Logical truth_translated.csv,Lie sphere geometry_translated.csv,Reason_translated.csv,Birational geometry_translated.csv,Antinomy_translated.csv,...,Digital geometry_translated.csv,Taxicab geometry_translated.csv,Noncommutative geometry_translated.csv,Toric geometry_translated.csv,Combinatorial geometry_translated.csv,Distance geometry_translated.csv,Syntax (logic)_translated.csv,"(ε, δ)-definition of limit_translated.csv",Limit (mathematics)_translated.csv,A priori and a posteriori_translated.csv
0,de,Dezimaltrennzeichen,,Synthetische Geometrie,Grenzwert (Funktion),,,Vernunft,Birationale Äquivalenz,Antinomie,...,Digitale Geometrie,Manhattan-Metrik,Nichtkommutative Geometrie,Torische Varietät,,,,Grenzwert (Funktion),,
1,es,Separador decimal,,Geometría sintética,Límite de una función,Verdad lógica,,Razón,,Antinomia,...,Geometría digital,Geometría del taxista,Geometría no conmutativa,,Geometría discreta,,,Límite de una función,Límite (matemática),A priori y a posteriori
2,en,Decimal point,Inversive geometry,Synthetic geometry,Limit of a function,Logical truth,Lie sphere geometry,Reason,Birational geometry,Antinomy,...,Digital geometry,Taxicab geometry,Noncommutative geometry,Toric geometry,Combinatorial geometry,Distance geometry,Syntax (logic),"(ε, δ)-definition of limit",Limit (mathematics),A priori and a posteriori


In [37]:
df = merged_dataframe.reset_index(drop=True)

In [38]:
df

Unnamed: 0,Language,Decimal point_translated.csv,Inversive geometry_translated.csv,Synthetic geometry_translated.csv,Limit of a function_translated.csv,Logical truth_translated.csv,Lie sphere geometry_translated.csv,Reason_translated.csv,Birational geometry_translated.csv,Antinomy_translated.csv,...,Digital geometry_translated.csv,Taxicab geometry_translated.csv,Noncommutative geometry_translated.csv,Toric geometry_translated.csv,Combinatorial geometry_translated.csv,Distance geometry_translated.csv,Syntax (logic)_translated.csv,"(ε, δ)-definition of limit_translated.csv",Limit (mathematics)_translated.csv,A priori and a posteriori_translated.csv
0,de,Dezimaltrennzeichen,,Synthetische Geometrie,Grenzwert (Funktion),,,Vernunft,Birationale Äquivalenz,Antinomie,...,Digitale Geometrie,Manhattan-Metrik,Nichtkommutative Geometrie,Torische Varietät,,,,Grenzwert (Funktion),,
1,es,Separador decimal,,Geometría sintética,Límite de una función,Verdad lógica,,Razón,,Antinomia,...,Geometría digital,Geometría del taxista,Geometría no conmutativa,,Geometría discreta,,,Límite de una función,Límite (matemática),A priori y a posteriori
2,en,Decimal point,Inversive geometry,Synthetic geometry,Limit of a function,Logical truth,Lie sphere geometry,Reason,Birational geometry,Antinomy,...,Digital geometry,Taxicab geometry,Noncommutative geometry,Toric geometry,Combinatorial geometry,Distance geometry,Syntax (logic),"(ε, δ)-definition of limit",Limit (mathematics),A priori and a posteriori


In [48]:
df = df.replace('NaN', np.nan)

# Count NaN values in each column
nan_count = df.isna().sum()

# Filter columns with NaN values
columns_with_nan = nan_count[nan_count > 0]

print(columns_with_nan)

Inversive geometry_translated.csv                            2
Logical truth_translated.csv                                 1
Lie sphere geometry_translated.csv                           2
Birational geometry_translated.csv                           1
Inversive ring geometry_translated.csv                       2
Parabolic geometry (differential geometry)_translated.csv    2
Reference_translated.csv                                     1
Logical form_translated.csv                                  1
Strict conditional_translated.csv                            1
Archimedes' use of infinitesimals_translated.csv             1
Orders of approximation_translated.csv                       2
Noncommutative algebraic geometry_translated.csv             2
List of mathematical identities_translated.csv               2
Discrete geometry_translated.csv                             1
Symplectic geometry_translated.csv                           1
One-sided limit_translated.csv                         

In [42]:
df.shape

(3, 107)