In [None]:
import time
from humanfriendly import format_timespan
import paramiko
import numpy as np
from astropy.io import fits
import os
import stat
import pandas as pd

begin_time = time.time()

#information for connection
#For get the password ask to Neil, Etienne, Frederique, Charles or Thomas
hostname = 'maestria.astro.umontreal.ca'
port = 5822
username = 'nirps-client'
password = '****************'
remote_directory = r"/cosmos99/nirps/apero-data/nirps_ha_07276_online/red"

#this method connects to SSH server and checks if the connection is ok
def check_connection(hostname, port, username, password):
    client = paramiko.SSHClient()
    try:
        client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        client.connect(hostname, port=port, username=username, password=password)
        sftp = client.open_sftp()
        sftp.chdir(remote_directory)
        print("Connection to SSH server is OK")
        return client, sftp  # Return both the SSH client and sftp connection

    except paramiko.AuthenticationException:
        print("Authentication failed")
    except paramiko.SSHException as ssh_exception:
        print(f"SSH error: {ssh_exception}")
    except paramiko.socket.error as socket_error:
        print(f"Socket error: {socket_error}")

# This method search for all files in the folder and its subfolders
#-type f  means we're looking for files (and not directories)
def list_and_count_files(ssh_client,folder_path):
    stdin, stdout, stderr = ssh_client.exec_command('find ' + folder_path + ' -type f')
    file_list = stdout.readlines()
    file_count = len(file_list)
    return file_count

# This method search for specific files (determined suffix) in the folder and its subfolders
def paramiko_glob(path, suffix, sftp):
    file_list = []
    try:
        sftp.listdir(path)  # Check if the directory exists
    except IOError as e:
        print(f"Directory not found: {path}")
        return file_list  # Return an empty list since the directory doesn't exist

    for entry in sftp.listdir_attr(path):
        entry_path = os.path.join(path, entry.filename).replace('\\', '/')
        if stat.S_ISDIR(entry.st_mode):
            file_list.extend(paramiko_glob(entry_path, suffix, sftp))
        elif entry.filename.endswith(suffix):
            file_list.append(entry_path)
    return file_list


def normalize_path(path):
    return path.replace('\\', '/')

data_list = []

#This method open the files remotely and calculates the NaN fraction
#A list with file name, date e nan fraction is created
def read_data_with_retry(file_path, max_retries=3, retry_delay=2):
    for _ in range(max_retries):
        try:
            with sftp.open(file_path, 'rb') as remote_file:
                data = fits.getdata(remote_file)
                nan_fraction = np.mean(~np.isfinite(data[:, data.shape[1] // 2 - 100: data.shape[1] // 2 + 100]))

                # Extract the subfolder and file name from the file path
                subfolder, filename = os.path.split(file_path)

                # Append the data to the list as a dictionary
                data_list.append({
                    'Date': subfolder.split('/')[-1],
                    'file name': filename,
                    'NaN fraction': nan_fraction
                })

                # Print the output as before
                print(f"{subfolder}/{filename} | NaN fraction: {nan_fraction:.4f}")

                return True
        except Exception as e:
            print("Error reading data from file:", file_path)
            print("Exception:", e)
            time.sleep(retry_delay)
    return False


# Create the SSH client and SFTP connection
ssh_client, sftp = check_connection(hostname, port, username, password)

#checking if we are connected to ssh
if ssh_client:

    number_files = list_and_count_files(ssh_client, remote_directory)
    print("The current path is: ", remote_directory)
    print("The total number of files in this path is:", number_files)


    # Get the list of specified files (relative paths)
    specified_files = paramiko_glob(remote_directory, 'E_pp_e2dsff_A.fits', sftp)
    print("The number of files with specific end is:", len(specified_files))

    # Read data with retry and delay
    for file in specified_files:
        if read_data_with_retry(file):
            continue  # Proceed to the next file if read successfully
        else:
            print("Failed to read data for file:", file)

    # After the loop, create a pandas DataFrame from the list
    df = pd.DataFrame(data_list)

    # Save the DataFrame to an Excel file
    df.to_excel("output_table.xlsx", index=False)

    sftp.close()
    ssh_client.close()

# Print the total execution time of the code
end_time = time.time() - begin_time
print("Total execution time: ", format_timespan(end_time))
