In [1]:
import pandas as pd
import os
from distutils.dir_util import copy_tree

In [2]:
if not os.path.exists("./all_combined"):
    os.makedirs("./all_combined")

In [3]:
def combine_csv_files(paths_to_csv, output_csv_path):
    # Create a list of open dataframes
    dfs = []
    # Read all csv files as dataframes one by one
    for path in paths_to_csv:
        dfs.append(pd.read_csv(path))
    # Combine all dataframes into one
    combined_df = pd.concat(dfs)
    # Sort all rows in ascending order of labels
    combined_df.sort_values("Label", ascending=True, inplace=True)
    # Save the combined dataframe to the specified path without index column
    combined_df.to_csv(output_csv_path, index=False)
    print("Created combined csv file at {}".format(output_csv_path))

In [4]:
# Combine training data of English and Hindi into a single csv file
combine_csv_files(["./english/train.csv", "./hindi/train.csv"], "./all_combined/train.csv")

Created combined csv file at ./all_combined/train.csv


In [5]:
# Combine testing data of English and Hindi into a single csv file
combine_csv_files(["./english/test.csv", "./hindi/test.csv"], "./all_combined/test.csv")

Created combined csv file at ./all_combined/test.csv


In [6]:
def combine_image_files(paths_to_image_dirs, output_dir_path):
    for path in paths_to_image_dirs:
        print("Copying contents of {} to {}".format(path, output_dir_path))
        copy_tree(path, output_dir_path)

In [7]:
# Copy training images of English and Hindi to a single location
combine_image_files(["./english/train", "./hindi/train"], "./all_combined/train")

Copying contents of ./english/train to ./all_combined/train
Copying contents of ./hindi/train to ./all_combined/train


In [8]:
# Copy testing images of English and Hindi to a single location
combine_image_files(["./english/test", "./hindi/test"], "./all_combined/test")

Copying contents of ./english/test to ./all_combined/test
Copying contents of ./hindi/test to ./all_combined/test
