In [54]:
#
# This code is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This code is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#

# Load the weather observations in a Station Exchange Format (SEF) file

import io
import pandas


def read_file(file_name):
    """Load the contents of the specified file.

    Args:
        file_name (:obj:`str`): File (or 'open'able object)

    Returns:
        :obj:`dict`: Data as key:value pairs.

    Raises:
        IOError: Not a readable SEF file.

    |
    """

    f = io.open(file_name, 'rb')
    # Check that it's a SEF file and get the version
    l = f.readline().rstrip()
    if l[0:3] != b'SEF':
        print(l[0:3])
        raise IOError("%s does not look like a SEF file" % file_name)
    version = l.split(b'\t')[1]
    # iversion = [int(x) for x in version.split('.')]
    # if iversion[1] > 0 or iversion[2] > 0:
    #     raise IOError("SEF versions > 0.0 are not supported")
    result = {'SEF': version}
    # Read in the header rows
    for row in range(11):
        header = f.readline().rstrip().split(b'\t')
        header = [h.decode('utf-8') for h in header]
        try:
            result[header[0]] = header[1]
        except IndexError:
            result[header[0]] = None
    if result['Meta'] is not None:
        result['Meta'] = result['Meta'].split(',')
    f.close()
    # Read in the data table
    o = pandas.read_csv(file_name, sep='\t', skiprows=12, usecols=list(range(8)))
    o['Meta'] = o['Meta'].map(lambda x: x.split(','), na_action='ignore')
    result['Data'] = o
    return result

In [64]:
import os
import pandas as pd

def count_rows_in_tsv_files(base_directory):
    total_row_count = 0

    # Walk through the directory structure
    for root, dirs, files in os.walk(base_directory):
        # Check if the current directory is a 'formatted' directory
        if os.path.basename(root) == 'formatted':
            for file in files:
                if file.endswith('.tsv'):
                    file_path = os.path.join(root, file)
                    # Read the TSV file and count the rows
                    df = read_file(file_path)['Data']
                    row_count = len(df)
                    total_row_count += row_count
                    print(f"File: {file_path}, Row Count: {row_count}")

    print(f"Total Row Count: {total_row_count}")
    return total_row_count

# Specify the base directory containing the {area}/data/formatted structure
base_directory = 'Rescued-Data'
count_rows_in_tsv_files(base_directory)


File: Rescued-Data/SouthPacific/data/formatted/C3S_SouthPacific_Maatsuyker_19180409-19450831_ta.tsv, Row Count: 28764
File: Rescued-Data/SouthPacific/data/formatted/C3S_SouthPacific_Maatsuyker_19180409-19450831_p.tsv, Row Count: 28775
File: Rescued-Data/SouthAfrica/data/formatted/C3S_SouthAfrica_Cape_Town_Obs_18790101-18791231_tb_mean_bis.tsv, Row Count: 365
File: Rescued-Data/SouthAfrica/data/formatted/C3S_SouthAfrica_Swellendam_18210101-18261231_Tx.tsv, Row Count: 1424
File: Rescued-Data/SouthAfrica/data/formatted/C3S_SouthAfrica_FortNapier_18750101-18791231_p.tsv, Row Count: 1825
File: Rescued-Data/SouthAfrica/data/formatted/C3S_SouthAfrica_Cape_Town_Obs_18340528-19320731_Tn.tsv, Row Count: 31419
File: Rescued-Data/SouthAfrica/data/formatted/C3S_SouthAfrica_Cape_Town_Obs_19000102-19320731_rr.tsv, Row Count: 10986
File: Rescued-Data/SouthAfrica/data/formatted/C3S_SouthAfrica_Cape_Town_Obs_18790101-18800831_ta_mean.tsv, Row Count: 609
File: Rescued-Data/SouthAfrica/data/formatted/C3S_

1220453