In [38]:
import pandas as pd
import glob
import numpy as np

In [30]:
def concatenate_csv_files(folder_path, output_file, exclude_column_name):
    """
    Concatenates CSV files in a folder, dropping the first column and handling
    different column lengths.

    Args:
        folder_path (str): Path to the folder containing CSV files.
        output_file (str): Path to the output CSV file.
    """

    all_files = glob.glob(f"{folder_path}/*.csv")  # Find all CSV files in the folder

    if not all_files:
        raise FileNotFoundError(f"No CSV files found in the folder: {folder_path}")

    concatenated_df = pd.DataFrame()

    for filename in all_files:
        # Load the CSV file, excluding the specified column by name
        temp_df = pd.read_csv(filename).drop(columns=[exclude_column_name], errors='ignore')
        # Concatenate this file's data side-by-side with previous files' data
        concatenated_df = pd.concat([concatenated_df, temp_df], axis=1)
        # Finding and dropping columns "Unnamed" and "Date"
        cols_to_drop = [col for col in concatenated_df.columns if col.startswith('Unnamed') or col.startswith('date')]
        concatenated_df.drop(columns=cols_to_drop, errors='ignore', inplace=True)
    
    # Fill NaN values with linear interpolation
    concatenated_df.interpolate(method='linear', inplace=True, limit_direction='forward', axis=0)

    #Save the concatenated DataFrame to a new CSV file
    concatenated_df.to_csv(output_file, index=False)

In [31]:
# Example usage:
paths_in = ['data/time_series/danish_atm_daily', 
            'data/time_series/mipt_alpha',
            'data/time_series/nn5']
paths_out = ['data/danish_atm_daily.csv', 
            'data/mipt_alpha.csv',
            'data/nn5.csv']

In [32]:
for path, name in zip(paths_in, paths_out):
    concatenate_csv_files(path, name, ['date'])

In [40]:
#Checking for NaNs
for name in paths_out:
    df = pd.read_csv(name)
    if np.array(df.isna().sum()).sum() != 0:
        print(f'There are NaNs in {name}')
    else:
        print(f'There are no NaNs in {name}')

There are no NaNs in data/danish_atm_daily.csv
There are no NaNs in data/mipt_alpha.csv
There are no NaNs in data/nn5.csv
