In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
import utils.data_preparation as prep
import utils.prep_info as prep_info
import utils.prep_measurements as prep_measurements

In [3]:
# Input Variables
dir_path = "Textfiles"

In [4]:
def get_experiment_measurements_df(dir_path, file_name, trim = 25):
    
    # Combine directory path and file name to get the full file path
    file_path = os.path.join(dir_path, file_name)
    
    # Split the data into info and measurements
    info_text, measurements_text = prep.split_text(file_path)

    # Process the info text into a DataFrame
    info_df = prep_info.get_info_df(info_text)

    # Process the measurements text into a DataFrame
    measurements_df = prep_measurements.get_measurement_df(measurements_text, info_df, trim)
    
    # Ensure Datatype 
    measurements_df[measurements_df.columns] = measurements_df.apply(lambda col: pd.to_numeric(col, errors='coerce'))

    return measurements_df


def round_and_average_index(df):
    """
    Rundet Float-Werte des Index auf Integer ab und bildet den Durchschnitt für doppelte Indexwerte.
    
    Args:
        df (pd.DataFrame): DataFrame mit einem Float-Index.
        
    Returns:
        pd.DataFrame: DataFrame mit Integer-Index, wobei doppelte Indexwerte aggregiert (gemittelt) wurden.
    """
    # Float-Index in Integer umwandeln
    #df.index = df.index.astype(int)
    df.index = df.index.round(0).astype(int)
    
    # Gruppieren und den Durchschnitt für doppelte Indexwerte berechnen
    df = df.groupby(df.index).mean()
    
    return df

def get_merged_experiment_measurements_df(dir_path, trim=25):
    """
    Verarbeitet alle Dateien in einem Verzeichnis und erstellt einen zusammengeführten DataFrame,
    indem Spalten aus den Dateien hinzugefügt werden.
    
    Args:
        dir_path (str): Verzeichnis, in dem die Dateien gespeichert sind.
        
    Returns:
        pd.DataFrame: Zusammengeführter DataFrame mit experimentellen Messwerten für alle Dateien.
    """
    merged_df = pd.DataFrame()  # Initialisiere leeren DataFrame
    
    # Iteriere über alle Dateien im Verzeichnis
    for file_name in os.listdir(dir_path):
        if file_name.endswith(".txt"):  # Nur Textdateien verarbeiten
            try:
                # Messwerte-DataFrame für die aktuelle Datei
                measurements_df = get_experiment_measurements_df(dir_path, file_name, trim)
                
                # Float-Index auf Integer abrunden und Mehrfachwerte aggregieren
                measurements_df = round_and_average_index(measurements_df)
                
                # Eindeutige Benennung der Spalten für diese Datei
                #measurements_df.columns = [f"{file_name}_{col}" for col in measurements_df.columns]
                
                # Füge die Ergebnisse als neue Spalten hinzu
                merged_df = pd.concat([merged_df, measurements_df], axis=1)
            
            except Exception as e:
                print(f"Fehler beim Verarbeiten der Datei {file_name}: {e}")
                
    # DataFrame nach Index sortieren
    merged_df = merged_df.sort_index()
    
    return merged_df

### Alle Messwerte

In [6]:
df = get_merged_experiment_measurements_df(dir_path, trim=24)


# Zeilen löschen, in denen mehr als ein NaN-Wert vorkommt
df_experiments = df[df.isnull().sum(axis=1) <= 1]

df_experiments

Unnamed: 0_level_0,0-PE-10_S3,0-PE-10_S5,0-PE-11_S3,0-PE-11_S5,0-PE-12_S3,0-PE-12_S5,0-PE-13_S3,0-PE-13_S5,0-PE-14_S3,0-PE-14_S5,...,UV-PET-77_S3,UV-PET-77_S5,UV-PET-78_S3,UV-PET-78_S5,UV-PET-79_S3,UV-PET-79_S5,UV-PET-8_S3,UV-PET-8_S5,UV-PET-9_S3,UV-PET-9_S5
Temp./°C,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-25,-4.13834,3.94766,-4.07456,3.91972,-4.23152,4.05790,-4.39800,4.13289,-3.96518,3.82275,...,-3.00131,2.88987,-3.60925,3.46959,-3.54603,3.40215,-1.66129,1.70131,-1.83357,1.87894
-24,-4.14527,3.95250,-4.08472,3.92429,-4.23991,4.06407,-4.40308,4.14106,-3.97401,3.82973,...,-3.00356,2.89599,-3.61345,3.47728,-3.54815,3.39419,-1.66014,1.70440,-1.83281,1.86790
-23,-4.15325,3.95307,-4.09332,3.92736,-4.24644,4.06864,-4.41021,4.15010,-3.98163,3.83740,...,-3.00786,2.90810,-3.61529,3.48758,-3.55306,3.40015,-1.65938,1.71841,-1.83115,1.87305
-22,-4.15870,3.96356,-4.10221,3.93813,-4.25397,4.07811,-4.41759,4.15815,-3.98859,3.84421,...,-3.01034,2.92446,-3.61842,3.49876,-3.55426,3.42468,-1.65940,1.70963,-1.83048,1.87415
-21,-4.16568,3.97627,-4.10940,3.94550,-4.26363,4.09258,-4.42599,4.17234,-3.99663,3.85675,...,-3.01405,2.91728,-3.61980,3.50415,-3.55853,3.43563,-1.65679,1.71217,-1.83004,1.88214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
270,-3.80783,6.77143,-3.73151,6.85277,-3.85604,7.17616,-3.86313,7.23334,-3.67707,6.77260,...,-2.97401,5.37605,-3.46827,6.44393,-3.41279,6.29337,-1.60951,2.52425,-1.76975,2.69898
271,-3.83343,6.78967,-3.74930,6.87615,-3.81522,7.19767,-3.89276,7.24583,-3.67147,6.79039,...,-2.95434,5.37883,-3.44970,6.45563,-3.39065,6.30691,-1.61558,2.53141,-1.76911,2.70881
272,-3.81834,6.80702,-3.74255,6.89665,-3.78873,7.21424,-3.87671,7.25314,-3.66555,6.81041,...,-2.95359,5.39402,-3.46039,6.47043,-3.39401,6.31307,-1.61694,2.53573,-1.77940,2.71216
273,-3.80690,6.81905,-3.72492,6.91947,-3.81515,7.22636,-3.86497,7.26902,-3.65708,6.82885,...,-2.94910,5.40875,-3.44480,6.48404,-3.39400,6.32582,-1.62530,2.54361,-1.77122,2.71296


### 2nd Heating

In [8]:
# Spalten extrahieren, die '_S5' im Namen enthalten
df_2nd_heating = df_experiments.filter(like='_S5')
df_2nd_heating.columns = df_2nd_heating.columns.str.replace('_S5', '', regex=False)

df_2nd_heating

Unnamed: 0_level_0,0-PE-10,0-PE-11,0-PE-12,0-PE-13,0-PE-14,0-PE-15,0-PE-16,0-PE-17,0-PE-18,0-PE-19,...,UV-PET-59,UV-PET-6,UV-PET-60,UV-PET-7,UV-PET-76,UV-PET-77,UV-PET-78,UV-PET-79,UV-PET-8,UV-PET-9
Temp./°C,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-25,3.94766,3.91972,4.05790,4.13289,3.82275,4.00157,4.18087,3.91814,4.97739,4.22145,...,3.95683,1.88544,4.02637,2.17032,2.98046,2.88987,3.46959,3.40215,1.70131,1.87894
-24,3.95250,3.92429,4.06407,4.14106,3.82973,3.99952,4.19073,3.92699,4.99383,4.22974,...,3.97837,1.88835,4.03066,2.16730,2.98969,2.89599,3.47728,3.39419,1.70440,1.86790
-23,3.95307,3.92736,4.06864,4.15010,3.83740,4.01068,4.19749,3.93712,4.98818,4.23567,...,3.97614,1.88846,4.05506,2.17359,2.99713,2.90810,3.48758,3.40015,1.71841,1.87305
-22,3.96356,3.93813,4.07811,4.15815,3.84421,4.01827,4.20877,3.94424,5.00459,4.25181,...,3.99298,1.87911,4.05299,2.16390,3.00757,2.92446,3.49876,3.42468,1.70963,1.87415
-21,3.97627,3.94550,4.09258,4.17234,3.85675,4.03683,4.21752,3.95309,5.01714,4.25870,...,4.01335,1.88492,4.05911,2.17059,3.01472,2.91728,3.50415,3.43563,1.71217,1.88214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
270,6.77143,6.85277,7.17616,7.23334,6.77260,7.11991,7.26021,7.18403,8.81364,7.51063,...,7.44245,2.85871,7.61240,3.09266,5.56269,5.37605,6.44393,6.29337,2.52425,2.69898
271,6.78967,6.87615,7.19767,7.24583,6.79039,7.13805,7.27224,7.20013,8.83735,7.52670,...,7.45806,2.86621,7.63346,3.09930,5.57293,5.37883,6.45563,6.30691,2.53141,2.70881
272,6.80702,6.89665,7.21424,7.25314,6.81041,7.15291,7.27928,7.21753,8.85928,7.54118,...,7.47667,2.87413,7.64855,3.10573,5.58632,5.39402,6.47043,6.31307,2.53573,2.71216
273,6.81905,6.91947,7.22636,7.26902,6.82885,7.16673,7.29254,7.22875,8.88105,7.55444,...,7.48946,2.87972,7.66672,3.11350,5.59626,5.40875,6.48404,6.32582,2.54361,2.71296


In [9]:
df_2nd_heating.to_csv("data/Experiments_2nd_Heating.csv", index=True)