In [72]:
import os
import pandas as pd
from datetime import datetime
import pytz
import numpy as np


def list_read_files(folder_path):
    try:
        # List all files in the folder
        _files = os.listdir(folder_path)      
        return _files       
    except FileNotFoundError:
        print("Folder not found!")
    except PermissionError:
        print("Permission denied!")
    except Exception as e:
        print(f"An error occurred: {e}")

        
def read_files_SFG(full_file_path):
    """ Function to read the SFG files, remove the last two columns and provide the stat/end times"""
    _df_sfg = pd.read_csv(full_file_path, header=0, index_col=None, encoding= 'unicode_escape')
    _df_sfg = _df_sfg.iloc[:,:-2]              #remove two last columns
    return _df_sfg


def get_timestamp(time_str):
    """ Function to convert time string to timestamp GMT"""
    date_format = "%m/%d/%Y %H:%M:%S"
    local_time = datetime.strptime(time_str, date_format)
    local_time = pytz.timezone('GMT').localize(local_time)  # Replace 'YOUR_LOCAL_TIMEZONE' with your local timezone
    return int(local_time.timestamp()*1000)
    
    
def read_files_MRL(full_file_path):
    """ Function to read the MRL files and add a timestamp"""
    _df_mrl = pd.read_csv(full_file_path, header=0, index_col=None, encoding= 'unicode_escape')
    _df_mrl['timestamp'] = _df_mrl['Time'].apply(get_timestamp)

    # Remove the column from its current position
    column = _df_mrl.pop('timestamp')
    
    # Insert the column at the first position
    _df_mrl.insert(0, 'timestamp', column)
    return _df_mrl


def within_range(df1_max, df2_min, df2_max):
    """check if the df1max value is found within df2"""
    _within_range = ((df1_max>=df2_min) and (df1_max<=df2_max))
    return  _within_range


def get_nrows(df1,df2,nrows):
    """Find the index in df2 corresponding to the max time value in df1"""
    _start_value = df1.timestamp.max()
    _start_index = df2.index[df2['timestamp'] == _start_value][0]
    
    # Slice the DataFrame to get the specified number of rows starting from the start index
    df3 = df2.iloc[_start_index+1:_start_index+nrows+1,:]
    
    # add a prefix to match the headers in the sfg file
    df3 = df3.add_prefix("PC1_")
    df3 = df3.rename(columns = {"PC1_timestamp":"timestamp"})
       
    # Find the difference in columns between df1 and df3
    missing_columns_df3 = df1.columns.difference(df3.columns).tolist()
    
    # add the missing columns into the dataframe
    df3 = pd.concat([df3,pd.DataFrame(columns = missing_columns_df3,index=df3.index)],axis=1)
    
    # make the columns order is the same as the sfg dataframe
    df3 = df3[df1.columns]    
    return  df3


#FOLDER PATH
folder_path_FB = 'C:\\Users\\rivil\DataCampINeuron\\python_code\\FB\\'
folder_path_MRL = 'C:\\Users\\rivil\DataCampINeuron\\python_code\\MRL\\'
files_sfg = list_read_files(folder_path_FB)
files_mrl = list_read_files(folder_path_MRL)
n_rows = 2700

# READING FILES
for file_sfg in files_sfg:
    full_path_sfg = os.path.join(folder_path_FB,file_sfg)
    df_sfg = read_files_SFG(full_path_sfg)
    
    for file_mrl in files_mrl:
        full_path_mrl = os.path.join(folder_path_MRL,file_mrl)
        df_mrl = read_files_MRL(full_path_mrl)
        
        # check if it is within range
        is_within_range = within_range(df_sfg.timestamp.max(), df_mrl.timestamp.min(), df_mrl.timestamp.max())

        if(is_within_range):            
            # get the rows from the df_mrl dataset
            df_mrl_nrows = get_nrows(df_sfg,df_mrl,n_rows)
            df_sfg_edit = pd.concat((df_sfg,df_mrl_nrows))
            
            # prepare the output dataset name
            file_name_sfg_edit = file_sfg.split(".")[0]+'_ext.'+file_sfg.split(".")[-1]
            full_path_sfg_edit = os.path.join(folder_path_FB,file_name_sfg_edit)
            
            #output to csv format
            df_sfg_edit.to_csv(full_path_sfg_edit, index=False, mode='w+')
            print("The following file has been created: ",full_path_sfg_edit)
            print(f"**** Input SFG file **** {file_sfg}, length:{len(df_sfg)},")
            print(f"**** Output SFG file **** {file_name_sfg_edit}, length:{len(df_sfg_edit)}")
            print(f"**** Total amount of rows added **** {len(df_sfg_edit)-len(df_sfg)}")
            print("\n")



    


SyntaxError: invalid syntax (97352262.py, line 112)