In [1]:
import pandas as pd

def append_to_master_kg(master_csv_path, additional_csv_paths):
    """
    Appends a list of CSV files to a master AlzKG CSV file, mirroring each edge to match the PrimeKG format.

    Parameters:
    - master_csv_path: str, the path to the master AlzKG CSV file.
    - additional_csv_paths: list, a list of paths to additional CSV files to append.

    Returns:
    - final_df: DataFrame, the combined DataFrame with mirrored edges.
    """
    # Load the master AlzKG CSV file
    kgraw = pd.read_csv(master_csv_path)

    # Load and concatenate the additional CSV files, ignoring their headers
    additional_dfs = [pd.read_csv(file_path, header=None, skiprows=1) for file_path in additional_csv_paths]
    combined_df = pd.concat(additional_dfs, ignore_index=True)

    # Assuming kgraw and additional CSVs share the same column structure
    combined_df.columns = kgraw.columns

    # Create a copy for mirroring
    interchanged_df = combined_df.copy()

    # Specify columns to interchange for mirroring
    # Adjust the column names based on your specific CSV format
    columns_to_interchange = ['id', 'type', 'name', 'source']
    for col in columns_to_interchange:
        # Adjust 'x_col' and 'y_col' to match your CSV's column naming pattern
        interchanged_df[f'x_{col}'], interchanged_df[f'y_{col}'] = combined_df[f'y_{col}'], combined_df[f'x_{col}']

    # Now concatenate kgraw with the original and mirrored dataframes
    final_df = pd.concat([kgraw, combined_df, interchanged_df], ignore_index=True)

    return final_df

# Example usage:
# Import the drive module from Google Colab for file access, mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
filepath = "/content/drive/My Drive/primekg_files"

master_csv_path = f'{filepath}/kg_raw_orig_filtered.csv'

additional_csv_paths = [
    f'{filepath}/Ex_kg.csv',
    f'{filepath}/In_kg.csv',
    f'{filepath}/Oli_kg.csv',
    f'{filepath}/Opc_kg.csv',
    f'{filepath}/Mic_kg.csv',
    f'{filepath}/Ast_kg.csv'
]

# Append the additional CSV files to the master CSV and mirror edges
final_df = append_to_master_kg(master_csv_path, additional_csv_paths)

# Save the final DataFrame to a new CSV file
final_df.to_csv(f'{filepath}/kgraw_with_mathys.csv', index=False)


Mounted at /content/drive


  kgraw = pd.read_csv(master_csv_path)
