# Daniopoint-py - Visualization functions

For further information on usage and versioning please check the GitHub repository:
https://github.com/rmassei/danio-python-tools

### Import packages

In [None]:
import os
import zipfile

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import io
import seaborn as sns

from statsmodels.stats.multicomp import pairwise_tukeyhsd
from openpyxl import Workbook
from openpyxl.drawing.image import Image as ExcelImage

### User input (1)

Input variables. Supported file types are .xlsx and .csv.

Input can be copy/pasted and "\" are supported.

Please, check that location are reported in the format c01, c02, c03 etc

In [None]:
file = input(r"Input file location:")

In [None]:
plate_type = input("Input plate layout:")
plate_type=int(plate_type)

In [None]:
location = input("Input column header with animal location (c01,c02...):")

In [None]:
endpoint = input("Input endpoint to analyze (i.e. smldist, lardist):")

### Read the ViewPoint file

In [None]:
file_name = os.path.basename(file)
name, extension = os.path.splitext(file_name)
if format(extension) == ".xlsx":
    df_raw = pd.read_excel(file)
else:
    df_raw = pd.read_table(file, encoding="utf-16", low_memory=False)

### Create the plate layout

Create a plate layout using mapping

In [None]:
if plate_type not in [24, 48, 96]:
    raise ValueError("Unsupported plate type. Supported types are 24, 48, and 96.")
num_rows = 8
num_columns = 12 if plate_type == 96 else 6
if plate_type == 24:
    location_mapping = {f'c{i:02d}': f'{chr(65 + (i - 1) // 6)}{((i - 1) % 6) + 1:02d}'
                        for i in range(1, 25)}
elif plate_type == 48:
    location_mapping = {f'c{i:02d}': f'{chr(65 + (i - 1) // 6)}{((i - 1) % 6) + 1:02d}'
                        for i in range(1, 49)}
else:
    location_mapping = {f'c{i:02d}': f'{chr(65 + (i - 1) // 12)}{((i - 1) % 12) + 1:02d}'
                        for i in range(1, 97)}
df_raw['well_plate_position'] = df_raw[location].map(location_mapping)
well_mapping = {f'{chr(65 + r)}{c + 1:02d}': (r, c) for r in range(num_rows)
                for c in range(num_columns)}
plate = np.zeros((num_rows, num_columns))

## 1) Heatmap and well plate plot

### Assign the endpoint value to each well

In [None]:
for _, row in df_raw.iterrows():
    well = row['well_plate_position']
    measurement = row[endpoint]
    row_idx, col_idx = well_mapping.get(well, (-1, -1))
    if row_idx != -1 and col_idx != -1:
        plate[row_idx, col_idx] += measurement

### Plot the heatmap/plate

In [None]:
plt.imshow(plate, cmap='Reds')
plt.colorbar()
plt.title(f'Results - Sum of {endpoint}')
plt.xticks(range(num_columns), range(1, num_columns + 1))
plt.yticks(range(num_rows), [chr(65 + r) for r in range(num_rows)])
for well, (row, col) in well_mapping.items():
    plt.text(col, row, well, ha='center', va='center', color='w')
plt.show()

## 2) Boxplot - Lineplot - Density plot

### User input (2)

Input variables treatment file AND time range. Supported file is .xlsx

Please, check that location are reported in the format c01, c02, c03 etc

In [None]:
user_input=input('Input treatment file location, otherwhise just press enter:')
if user_input.strip():
    treatments_file = user_input
else:
    treatments_file = None
type(treatments_file)

In [None]:
start=int(input('Input start time of analysis (lower range):'))
end=int(input('Input end time (upper range):'))
start_range = (start, end)

In [None]:
if treatments_file is not None:
    treatments_df = pd.read_excel(treatments_file)
    animal_treatments = dict(zip(treatments_df['well_code'], treatments_df['treatment']))
    df_raw['treatment'] = df_raw[location].map(animal_treatments)
else:
    df_raw['treatment'] = 'Unknown'
if start_range is not None:
    df_raw = df_raw[(df_raw['start'] >= start_range[0]) & (df_raw['start'] <= start_range[1])]

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='treatment', y=endpoint, data=df_raw, palette='Set3',
            showfliers=False) 
plt.title(f'Boxplot of {endpoint} by Treatment within Start Range')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(x='start', y=endpoint, hue='treatment', data=df_raw)
plt.title(f'Time Series Line Plot of {endpoint} by Treatment within Start Range')
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(data=df_raw, x=endpoint, hue='treatment', multiple="stack", kde=True)
plt.title(f'Distribution of {endpoint} by Treatment within Start Range')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='treatment', y=endpoint, data=df_raw, palette='Set3', showfliers=False)
for i, treatment in enumerate(df['treatment'].unique()):
    p_value = tukey_results.pvalues[i]
    stars = ""
    if p_value < 0.05:
        if p_value < 0.001:
            stars = "***"
        elif p_value < 0.01:
            stars = "**"
        else:
            stars = "*"
    plt.text(i, max(df[endpoint]), stars, ha='center', va='bottom', color='red', fontsize=12)
plt.title(f'Boxplot of {endpoint} by Treatment within Start Range')
plt.show()