# Charon4 txt to Python, 2018 data

# different export settings compared to the recent data!
The following screenshot shows the expected settings while exporting the 2018 data. They are exported per day, for easier reading of the different cable lengths.

<img src="./pictures/Charon4_export_window_txt_2018_day.png" alt="drawing" width="1300"/>

In [13]:
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
from datetime import date, timedelta
from collections import defaultdict
import pickle

from my_func_mvw.functions import get_abspath, read_pickle

In [14]:
##############Input##############################
path_DTS_processed = r"..\Alsdorf\Daten\DTS_processed"
data_save_csv    =False # True False
data_save_pickle =False # True False
##################################################

In [15]:
controller=3188 # 3188: Alsdorf
# generate all requested paths, some paths may be empty
path_to_controller_2018_day=path_DTS_processed + r"\..\DTS_unprocessed\Charon4\charon4_export_as_txt\2018_export_by_day"
year="2018"
channels=[1,2,3,4,5,6,7,8]
months=[1,2,3,4,5,6,7,8,9,10,11,12]
paths_for_activate_2018_day={}

for channel in channels:
    for month in months:
        paths_for_activate_2018_day[f"temp_ch{channel}_year2018_month{month}"] = path_to_controller_2018_day + f"\Controller\{controller}\{channel}\Temperature Data\{year}\{month}"
#paths_for_activate_2018_day

In [16]:
def import_temp_to_df_2018(path):
    """"""
    one_file = pd.read_csv(path,decimal=".",delimiter="\t",skiprows=7,index_col=0)
    one_file = one_file.drop(one_file.columns[0:2],axis=1)
    one_file.index = pd.to_datetime(one_file.index, infer_datetime_format=True).tz_localize(None)
    one_file.columns = one_file.columns.astype(float)
    one_file.index.names = ['Date']
    one_file.columns.names=["Length [m]"]
    return one_file

def create_base_dataframe(n_columns,sampling_05=False):
    """"""
    if sampling_05==False: #sampling interval is 1 m
        df=pd.DataFrame(columns=np.linspace(0,n_columns-1,n_columns))
        df.rename(columns=lambda x: float(x), inplace=True) #to have all 2018 data data as float
    elif sampling_05==True:
        df=pd.DataFrame(columns=np.linspace(0,(n_columns-1)/2,n_columns))
    df.index.names = ['Date']
    df.columns.names=["Length [m]"]
    return df

In [17]:
# Read all files exported by Charon

# Create base dic: depending on channel number and cable length
cable_lengths_2018=[233,1314,2340,1171,1170,798,259,799,268,1352,676,223,378,250,800]
# data_2018_day, is named day because the data is exported different from Charon4, than the data from the other years
data_2018_day=my_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) #three level defaultdict
for channel in channels:
    for cable_type in cable_lengths_2018:
        for month in months:

            if cable_type in [233,1314,1171,1170,798,259,799,268,676,223,378,250,800]: # sampling interval is 1 m
                data_2018_day[cable_type][str(channel)][str(month)] = create_base_dataframe(cable_type)

            elif cable_type in [2340, 1352]: #sampling interval is 0.5 m instead of 1 m
                data_2018_day[cable_type][str(channel)][str(month)] = create_base_dataframe(cable_type,sampling_05=True)

# Read the data
count_ParserError=0
for channel in channels:
    for month in months: #path could be empty --> no data 
        my_path=paths_for_activate_2018_day[f"temp_ch{channel}_year2018_month{month}"]
        all_my_paths=get_abspath(my_path + "\*\*") 

        for my_file in all_my_paths: # check cable length and create different dataframes
            try: # Read Data
                one_file = import_temp_to_df_2018(my_file)

                #check cable length and put it in corresponding dataframe
                n_columns=len(one_file.columns)
                data_2018_day[n_columns][str(channel)][str(month)]=pd.concat([data_2018_day[n_columns][str(channel)][str(month)],one_file],axis=0)
                # doppelte datums tauchen auf

            except pd.errors.ParserError: # except ParserError, file contains data with different cable lengths
                # With this solution I will skip these data.
                count_ParserError+=1
                pass
        
        # check if one folder contains more than one file
        check_for_multiple_files=True
        if check_for_multiple_files:
            path_to_day_folders=get_abspath(my_path + "\*")
            for day_folder in path_to_day_folders:
                n_files_in_dayfolder=len(get_abspath(day_folder + "\*"))
                if n_files_in_dayfolder != 1:
                    print("Warning: a day folder conatins mpre than one file")

print(f"{count_ParserError} ParserErrors were skipped. This means {count_ParserError} days with data are not read!")        

2 ParserErrors were skipped. This means 2 days with data are not read!


In [18]:
def save_helper(data_2018_day,channel,cable_length,create_base_dataframe=create_base_dataframe):
    """merges different month for each channel into one dataframe"""
    if cable_length in [233,1314,1171,1170,798,259,799,268,676,223,378,250,800]: # sampling interval is 1 m
        data_2018_day_save = create_base_dataframe(cable_length)
    elif cable_length in [2340, 1352]: #sampling interval is 0.5 m instead of 1 m
        data_2018_day_save = create_base_dataframe(cable_length,sampling_05=True)

    for month in data_2018_day[cable_length][channel].keys():
        one_file = data_2018_day[cable_length][channel][month]
        data_2018_day_save = pd.concat([data_2018_day_save , one_file],axis=0) 

    return data_2018_day_save

In [19]:
# Save Data 2018 csv
if data_save_csv:
    save_to_path = path_DTS_processed + r"\temp_2018_Controller3188\csv"
    for cable_length in data_2018_day.keys():
        for channel in data_2018_day[cable_length].keys():
            data_2018_day_save = save_helper(data_2018_day,channel,cable_length)

            # Save data
            if data_2018_day_save.shape[0]!=0: # dataframe contains rows (with data)
                filename=f"\\temp_cablelength{cable_length}_ch{channel}.csv"
                data_2018_day_save.to_csv(save_to_path + filename)

In [20]:
# Save Data 2018 pickle
if data_save_pickle:
    def write_pickle(save_to:str,data_2018_day):
        #Function to write pickle Files
        for cable_length in data_2018_day.keys():
            for channel in data_2018_day[cable_length].keys():
                data_2018_day_save = save_helper(data_2018_day,channel,cable_length)

                #if len(data_2018_day[cable_length][channel].keys()) != 0:
                if data_2018_day_save.shape[0]!=0: # dataframe contains rows (with data)
                    filename=f"\\temp_cablelength{cable_length}_ch{channel}"
                    with open(save_to + filename, 'wb') as handle:
                        pickle.dump(data_2018_day_save, handle, protocol=pickle.HIGHEST_PROTOCOL)

    save_to_path_pickle = path_DTS_processed + r"\temp_2018_Controller3188\pickle"
    write_pickle(save_to_path_pickle,data_2018_day)

    # Load pickle data - test
    filename=r"\temp_cablelength378_ch5"
    load_file_pickle = save_to_path_pickle + filename
    test_load_pickle=read_pickle(load_file_pickle)
    #test_load_pickle

In [21]:
# Check
if data_save_pickle: # use some variables, which I only define when saving pickle
    # TRT data from sciebo: sciebo\DTS Data\Alsdorf\Nullmessung_und_TRT_2018 - check if equal to the data here
    # I only compare Ch 2 TRT currently. I assume the other ones will be also the same.
    # Would need a bit more hacking to compare these too because the cable length changes so I would have to select multiple files of my database

    trt_data_sciebo_ch2={}
    trt_data_sciebo_ch1={}

    #path = r"..\Alsdorf\Nullmessung_und_TRT_2018\Natural State" # here multiple files would have to be compared because of cable length change
    path = r"..\Alsdorf\Nullmessung_und_TRT_2018\TRT"

    path_to_files_ch2 = get_abspath(path + "\CH_2\*")
    # path_to_files_ch1 = get_abspath(path + "\CH_1\*")
    for filename in path_to_files_ch2:
        trt_data_sciebo_ch2[filename] = pd.read_csv(filename,delimiter = ',',index_col=0, header=7 )
        trt_data_sciebo_ch2[filename].index = pd.to_datetime(trt_data_sciebo_ch2[filename].index, dayfirst = True).tz_localize(None)
        trt_data_sciebo_ch2[filename] = trt_data_sciebo_ch2[filename].drop(trt_data_sciebo_ch2[filename].columns[[0,1]], axis=1)
    # for filename in path_to_files_ch1:
    #     trt_data_sciebo_ch1[filename] = pd.read_csv(filename,delimiter = ',',index_col=0, header=7 )
    #     trt_data_sciebo_ch1[filename].index = pd.to_datetime(trt_data_sciebo_ch1[filename].index, dayfirst = True).tz_localize(None)
    #     trt_data_sciebo_ch1[filename] = trt_data_sciebo_ch1[filename].drop(trt_data_sciebo_ch1[filename].columns[[0,1]], axis=1)


    data_in_database=read_pickle(save_to_path_pickle + r"\temp_cablelength676_ch2")

    for filepath in path_to_files_ch2:
        once_sciebo_file=trt_data_sciebo_ch2[filepath]
        once_sciebo_file.columns=data_in_database.columns # so comparison can be easy made

        values_equal=True
        for date in once_sciebo_file.index:
            compare = once_sciebo_file.loc[date] == data_in_database.loc[date]
            if compare.sum() == len(compare): #all values are equal
                pass
            else:
                values_equal=False

    print(f"all values of channel 2 are equal: {values_equal}\nsciebo flies from {path}")