In [1]:
import os
import pandas as pd

def read_csv_files(directory):
    data_frames = []
    
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            file_path = os.path.join(directory, filename)
            original_df = pd.read_csv(file_path)
            
            if len(original_df.columns) == 1:
                first_column_name = original_df.columns[0]
                column_names = first_column_name.split()

                split_columns = original_df.iloc[:, 0].str.split(expand=True)
                new_df = pd.concat([original_df, split_columns], axis=1)
                new_df = new_df.drop(new_df.columns[0], axis=1)
                new_df.columns = column_names
                original_df = new_df
            
            original_df['Station'] = filename.split('.')[0]
            data_frames.append(original_df)
            
    return data_frames

csv_directory = '/Users/roshanchandru/Desktop/Dissertation/Dataset/Germany/Indicies/WSDI'

dataframes = read_csv_files(csv_directory)
dataframes

[    year  wsdi      Station
 0   1901    12  61700004063
 1   1902     8  61700004063
 2   1903    23  61700004063
 3   1904     0  61700004063
 4   1905     7  61700004063
 ..   ...   ...          ...
 94  1995     7  61700004063
 95  1996    21  61700004063
 96  1997     9  61700004063
 97  1998    36  61700004063
 98  1999     8  61700004063
 
 [99 rows x 3 columns],
      year  wsdi      Station
 0    1879 -99.9  LOCID000043
 1    1880 -99.9  LOCID000043
 2    1881 -99.9  LOCID000043
 3    1882 -99.9  LOCID000043
 4    1883 -99.9  LOCID000043
 ..    ...   ...          ...
 121  2000  14.0  LOCID000043
 122  2001   0.0  LOCID000043
 123  2002  34.0  LOCID000043
 124  2003   7.0  LOCID000043
 125  2004   6.0  LOCID000043
 
 [126 rows x 3 columns],
      year  wsdi      Station
 0    1879 -99.9  LOCID000042
 1    1880 -99.9  LOCID000042
 2    1881 -99.9  LOCID000042
 3    1882 -99.9  LOCID000042
 4    1883 -99.9  LOCID000042
 ..    ...   ...          ...
 121  2000 -99.9  LOCID000042

In [2]:
for i, df in enumerate(dataframes, start=1):
    print(f"Shape of csv{i}:", df.shape)

Shape of csv1: (99, 3)
Shape of csv2: (126, 3)
Shape of csv3: (126, 3)
Shape of csv4: (135, 3)
Shape of csv5: (126, 3)
Shape of csv6: (135, 3)
Shape of csv7: (51, 3)
Shape of csv8: (126, 3)
Shape of csv9: (126, 3)
Shape of csv10: (126, 3)
Shape of csv11: (126, 3)
Shape of csv12: (99, 3)
Shape of csv13: (126, 3)
Shape of csv14: (126, 3)
Shape of csv15: (129, 3)
Shape of csv16: (126, 3)
Shape of csv17: (83, 3)
Shape of csv18: (99, 3)
Shape of csv19: (99, 3)
Shape of csv20: (93, 3)
Shape of csv21: (99, 3)
Shape of csv22: (91, 3)
Shape of csv23: (129, 3)
Shape of csv24: (126, 3)
Shape of csv25: (99, 3)
Shape of csv26: (99, 3)
Shape of csv27: (90, 3)
Shape of csv28: (99, 3)
Shape of csv29: (99, 3)
Shape of csv30: (99, 3)
Shape of csv31: (126, 3)
Shape of csv32: (126, 3)
Shape of csv33: (99, 3)
Shape of csv34: (121, 3)
Shape of csv35: (126, 3)
Shape of csv36: (126, 3)
Shape of csv37: (99, 3)
Shape of csv38: (109, 3)


In [3]:
print(type(dataframes))

<class 'list'>


In [4]:
import pandas as pd

def merge_csv_files(file_paths):
    # Initialize an empty list to store individual DataFrames
    dfs = []

    # Iterate over each file path
    for file_path in file_paths:
        # Read each CSV file into a DataFrame and append to the list
        df = pd.read_csv(file_path)
        dfs.append(df)

    # Concatenate all DataFrames in the list along the rows
    merged_df_wsdi = pd.concat(dfs, ignore_index=True)

    return merged_df_wsdi

# Example usage:
merged_df_wsdi = pd.concat(dataframes, ignore_index=True)
merged_df_wsdi

Unnamed: 0,year,wsdi,Station
0,1901,12.0,61700004063
1,1902,8.0,61700004063
2,1903,23.0,61700004063
3,1904,0.0,61700004063
4,1905,7.0,61700004063
...,...,...,...
4239,1995,13.0,61700010147
4240,1996,6.0,61700010147
4241,1997,18.0,61700010147
4242,1998,14.0,61700010147


In [5]:
merged_df_wsdi.head(30)

Unnamed: 0,year,wsdi,Station
0,1901,12.0,61700004063
1,1902,8.0,61700004063
2,1903,23.0,61700004063
3,1904,0.0,61700004063
4,1905,7.0,61700004063
5,1906,6.0,61700004063
6,1907,0.0,61700004063
7,1908,0.0,61700004063
8,1909,0.0,61700004063
9,1910,6.0,61700004063


In [6]:
merged_df_wsdi.to_csv('merged_df_wsdi.csv', index=False)