# preparations

## Import libraries

In [None]:
import pandas as pd
import glob
import datetime
from google.colab import drive
import matplotlib.pyplot as plt
import numpy as np
import os

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Dictionaries

In [None]:
# Define the treatment dictionary
treatment_dict = {'1': 'control', '2': 'drought', '3': 'control', '4': 'control', '5': 'drought', '6': 'drought'}

# Define the ID dictionary
ID_dict = {
    'csi': ('carob', 'Ceratonia siliqua', 'cera'),
    'ph': ('pine', 'Pinus halepensis', 'pine'),
    'pl': ('pistacia', 'Pistacia lentiscus', 'pist'),
    'qc': ('oak', 'Quercus calliprinos', 'quer'),
    'cse': ('cypress', 'Cupressus sempervirens', 'cyp')
}

## path for the files - **check before starting!**

In [None]:
# Define the path to the folder containing the CSV files
folder_path = '/content/drive/MyDrive/projects/Thesis/data/tree_tod/tree_tod_09_23'

# Orgenizing the data

In [None]:
# Create an empty list to store the DataFrames
dfs = []

# Iterate over each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Extract plot number from the filename
        plot = filename.split('_')[-1].split('.')[0][-1]

        # Read the CSV file into a DataFrame
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path, delimiter=',')

        # Rename the first column to 'date_hour'
        df.rename(columns={df.columns[0]: 'date_hour'}, inplace=True)

        # Extract experiment, time of day, and species ID from the filename
        experiment, time_of_day, species_id = filename.split('_')[:3]

        # Extract English name, Latin name, and short name from the species ID using ID_dict
        english_name, latin_name, short_name = ID_dict.get(species_id[:-5], ('', '', ''))

        # Add additional columns to the DataFrame
        df['plot'] = plot
        df['treatment'] = treatment_dict.get(plot, '')
        df['English_name'] = english_name
        df['Latin_name'] = latin_name
        df['Time_of_day'] = time_of_day

        # Append the DataFrame to the list
        dfs.append(df)

# Concatenate all DataFrames in the list
merged_df = pd.concat(dfs, ignore_index=True)

# show the dataframe
merged_df


Unnamed: 0,date_hour,Date_Time,Code,Area,Label,Pamb,Tcuv,Tleaf,Tamb,Ttop,...,WUE_instantaneous,plot,treatment,English_name,Latin_name,Time_of_day,Area factor std,E_corrected_std,gs_corrected_std,A_corrected_std
0,2022-01-12 12:33:31,1641990811000000000,MP_001,8,cse6,97.500,23.15,20.93,23.65,23.71,...,,6,drought,cypress,Cupressus sempervirens,noon,,,,
1,2022-02-13 12:41:41,1644756101000000000,MP_005,8,cse6,97.848,24.17,22.28,24.83,24.30,...,,6,drought,cypress,Cupressus sempervirens,noon,,,,
2,2022-03-16 13:20:35,1647436835000000000,MP_005,8,cse6,98.368,11.10,10.35,9.74,11.54,...,11.949024,6,drought,cypress,Cupressus sempervirens,noon,,,,
3,2022-04-14 13:01:59,1649941319000000000,MP_005,8,cse6,97.244,22.21,21.57,20.71,23.42,...,4.377112,6,drought,cypress,Cupressus sempervirens,noon,,,,
4,2022-05-09 12:25:25,1652099125000000000,MP_005,8,cse6,97.014,27.64,27.04,28.54,30.21,...,0.380528,6,drought,cypress,Cupressus sempervirens,noon,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1299,2022-09-07 14:53:33,1662562413000000000,MP_005,8,pl4,96.862,26.49,27.70,25.26,27.65,...,7.367423,4,control,pistacia,Pistacia lentiscus,afternoon,,,,
1300,2022-12-01 16:04:22,1669910662000000000,MP_001,8,pl4,97.650,20.18,20.06,18.89,19.99,...,3.746793,4,control,pistacia,Pistacia lentiscus,afternoon,,,,
1301,2023-01-11 14:20:47,1673446847000000000,MP_001,8,pl4,97.890,19.02,19.28,18.56,19.09,...,4.452633,4,control,pistacia,Pistacia lentiscus,afternoon,,,,
1302,2023-02-26 15:54:40,1677426880000000000,MP_001,8,pl4,97.840,18.47,18.52,17.21,18.58,...,6.122619,4,control,pistacia,Pistacia lentiscus,afternoon,,,,


## save CSV

In [None]:
# Save the merged DataFrame as a CSV file
#date of data
date_data = '09_23'
output_folder = '/content/drive/MyDrive/projects/Thesis/data/tree_tod/'
output_file = os.path.join(output_folder, f'tree_tod_merged_data_{date_data}.csv')
merged_df.to_csv(output_file, index=False)
