# Daniopoint-py - Metadata extraction

For further information on usage and versioning please check the GitHub repository:
https://github.com/rmassei/danio-python-tools

### Import packages

In [None]:
import pandas as pd
import os
import sys

### User input

Input variables. Supported file types are .xlsx and .csv.

Input can be copy/pasted and "\" are supported.

In [None]:
file = input(r"Input file location:")

### Read the ViewPoint file

In [None]:
file_name = os.path.basename(file)
name, extension = os.path.splitext(file_name)
if format(extension) == ".xlsx":
    df_raw = pd.read_excel(file)
else:
    df_raw = pd.read_table(file, encoding="utf-16", low_memory=False)

### Extract metadata from the ViewPoint file

This code chunk extract the information from the ViewPoint file. Potentially, some variable name need to be changed to fir the ViewPoint file column headers.

In this specific example, light and dark phase are specificied in "stimuli_name", but it can also change according to the software version.

In [None]:
unique_counts = df_raw['location'].nunique()
bin = df_raw.iloc[1]['end']
type = df_raw.iloc[1]['datatype']
date = df_raw.iloc[1]['stdate']
if "user" in df_raw.columns:
    user = df_raw.iloc[1]['user']
if "operator" in df_raw.columns:
    user = df_raw.iloc[1]["operator"]
time = df_raw.iloc[1]['sttime']
meas_time = df_raw['end'].iloc[-1]
if 'stimuli_name' in df_raw.columns:
    filtered_df = df_raw[df_raw['stimuli_name'].notna()]
    filtered_df = filtered_df[~filtered_df['stimuli_name'].duplicated(keep='first')]
    phases = filtered_df['stimuli_name'].nunique()
    selected_columns = ["end", "stimuli_name"]
    filtered_df["end"] = filtered_df["end"].round()
    result_df = filtered_df[selected_columns]
    result_df['Time_Minutes'] = result_df['end'] / 60
    result_df.rename(columns={'end': 'Time_Seconds', 'stimuli_name': 'Phase_Name'}, inplace=True)

### Print output

In [None]:
print("File Name: {}".format(name))
print("Extension: {}".format(extension))
print(f"The user {user} run the test on the {date}. The test started at {time}.\n"
      f"The test was run in a {unique_counts} well plate.\n"
      f'Binning was set at {bin} seconds while datatype was set as "{type}". \n'
      f"Total measurement time was {round(meas_time)} seconds ({round(meas_time) / 60} minutes).")
if 'stimuli_name' in df_raw.columns:
        print(f"The script detected {phases} potential light phases")
        print(result_df)
else:
    print("No information on data stimuli was retrieved")