In [1]:
#for the prototype

import pandas as pd
import os
import pyarrow.parquet as pq
import sys
from scipy.stats import pearsonr, spearmanr
from scipy.spatial.distance import cosine
from scipy import stats
import array
import io
from sklearn.datasets import make_classification
from feature_engine.selection import DropCorrelatedFeatures
from sklearn.manifold import Isomap
from sklearn.decomposition import PCA
from umap import UMAP
import numpy as np

#for the GUI

import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.optimizers import RMSprop
from tkinter import * #imports all functions for the tkinter module
import tkinter
from tkinter import ttk
import tkinter.messagebox
import customtkinter
import subprocess
import tksheet

In [None]:
customtkinter.set_appearance_mode("System")  # Modes: "System" (standard), "Dark", "Light"
customtkinter.set_default_color_theme("green")  # Themes: "blue" (standard), "green", "dark-blue"


class App(customtkinter.CTk):
    def __init__(self):
        super().__init__()
        self.corr = 0.0  # Set a default value
        self.query = ""

        # configure window
        self.title("Phenomics GUI")
        self.geometry(f"{1100}x{580}")

        # configure grid layout (4x4)
        self.grid_columnconfigure(1, weight=1)
        self.grid_columnconfigure((2, 3), weight=0)
        self.grid_rowconfigure((0, 1, 2), weight=1)

        # create sidebar frame with widgets
        self.sidebar_frame = customtkinter.CTkFrame(self, width=140, corner_radius=0)
        self.sidebar_frame.grid(row=0, column=0, rowspan=4, sticky="nsew")
        self.sidebar_frame.grid_rowconfigure(4, weight=1)
        self.logo_label = customtkinter.CTkLabel(self.sidebar_frame, text="Phenomics", font=customtkinter.CTkFont(size=35, weight="bold"))


        self.logo_label.grid(row=0, column=0, padx=20, pady=(20, 10))
        self.appearance_mode_label = customtkinter.CTkLabel(self.sidebar_frame, text="Appearance Mode:", anchor="w")
        self.appearance_mode_label.grid(row=5, column=0, padx=20, pady=(10, 0))
        self.appearance_mode_optionemenu = customtkinter.CTkOptionMenu(self.sidebar_frame, values=["Light", "Dark", "System"],
                                                                       command=self.change_appearance_mode_event)
        self.appearance_mode_optionemenu.grid(row=6, column=0, padx=20, pady=(10, 10))
        self.scaling_label = customtkinter.CTkLabel(self.sidebar_frame, text="UI Scaling:", anchor="w")
        self.scaling_label.grid(row=7, column=0, padx=20, pady=(10, 0))
        self.scaling_optionemenu = customtkinter.CTkOptionMenu(self.sidebar_frame, values=["80%", "90%", "100%", "110%", "120%"],
                                                               command=self.change_scaling_event)
        self.scaling_optionemenu.grid(row=8, column=0, padx=20, pady=(10, 20))

        # create main entry and button
        self.entry = customtkinter.CTkEntry(self, placeholder_text="Enter your query here:")
        self.entry.grid(row=3, column=1, columnspan=2, padx=(20, 0), pady=(20, 20), sticky="nsew")

        #self.main_button_1 = customtkinter.CTkButton(master=self, fg_color="transparent", border_width=2, text_color=("gray10", "#DCE4EE"), text = "Let's Query!")
        self.main_button_1 = customtkinter.CTkButton(master=self, fg_color="transparent", border_width=2, text_color=("gray10", "#DCE4EE"), text="Let's Query!", command=self.query_button_click)
        self.main_button_1.grid(row=3, column=3, padx=(20, 20), pady=(20, 20), sticky="nsew")

        # create textbox
        self.textbox = customtkinter.CTkTextbox(self)
        self.textbox.grid(row=0, column=1, columnspan =2,padx=(20, 0), pady=(20,0), sticky="nsew")

        # create tabview HEREHERE HERE
        #self.tabview_frame = customtkinter.CTkScrollableFrame(self, label_text="Correlation Thresholds")
        #self.tabview_frame.grid(row=0, column=2, padx=(20, 0), pady=(20, 0), sticky="nsew")
        #self.string_input_button = customtkinter.CTkButton(master=self.tabview_frame, text="Insert a Correlation Cut-Off ", command=self.open_input_dialog_event)
        #self.string_input_button.grid(row=0, column=0, padx=20, pady=(0, 0))

        # create radiobutton frame
        #self.radiobutton_frame = customtkinter.CTkFrame(self)
        self.radiobutton_frame = customtkinter.CTkScrollableFrame(self, label_text="Correlation Methods")
        self.radiobutton_frame.grid(row=0, column=3, padx=(20, 20), pady=(20, 0), sticky="nsew")
        self.radio_var = tkinter.IntVar(value=0)
        #self.label_radio_group = customtkinter.CTkLabel(master=self.radiobutton_frame)
        #self.label_radio_group.grid(row=0, column=2, columnspan=1, padx=10, pady=10, sticky="")
        self.radio_button_1 = customtkinter.CTkRadioButton(master=self.radiobutton_frame, variable=self.radio_var, value=0)
        self.radio_button_1.grid(row=1, column=2, pady=10, padx=20, sticky="w")
        self.radio_button_2 = customtkinter.CTkRadioButton(master=self.radiobutton_frame, variable=self.radio_var, value=1)
        self.radio_button_2.grid(row=2, column=2, pady=10, padx=20, sticky="w")
        self.radio_button_3 = customtkinter.CTkRadioButton(master=self.radiobutton_frame, variable=self.radio_var, value=2)
        self.radio_button_3.grid(row=3, column=2, pady=10, padx=20, sticky="w")
        self.radio_button_1.configure(text="Pearson Correlation")
        self.radio_button_2.configure(text="Spearman Correlation")
        self.radio_button_3.configure(text="Cosine Similarity")
        self.string_input_button = customtkinter.CTkButton(master=self.radiobutton_frame, text="Insert a Correlation Cut-Off ", command=self.open_input_dialog_event)
        self.string_input_button.grid(row=4, column=2, padx=45, pady=20)

        # create slider and progressbar frame
        self.slider_progressbar_frame = customtkinter.CTkFrame(self, fg_color="transparent")
        self.slider_progressbar_frame.grid(row=0, column=1, padx=(20, 0), pady=(150, 0), sticky="nsew")
        self.slider_progressbar_frame.grid_columnconfigure(0, weight=1)
        self.slider_progressbar_frame.grid_rowconfigure(4, weight=1)
        #self.seg_button_1 = customtkinter.CTkSegmentedButton(self.slider_progressbar_frame)
        #self.seg_button_1.grid(row=0, column=0, padx=(20, 10), pady=(10, 10), sticky="ew")


        # create scrollable frame
        self.scrollable_frame = customtkinter.CTkScrollableFrame(self, label_text="Cellular Component Selection")
        self.scrollable_frame.grid(row=0, column=2,padx=(20, 0), pady=(160, 0),rowspan = 2,sticky="nsew")
        self.scrollable_frame.grid_columnconfigure(0, weight=1)
        self.scrollable_frame_switches = []
        switch1 = customtkinter.CTkSwitch(master=self.scrollable_frame, text="Mitochondria")
        switch1.grid(row=0, column=0, padx=10, pady=(10, 5), sticky="w")
        self.scrollable_frame_switches.append(switch1)
        switch2 = customtkinter.CTkSwitch(master=self.scrollable_frame, text="Nucleus")
        switch2.grid(row=1, column=0, padx=10, pady=(10, 5), sticky="w")
        self.scrollable_frame_switches.append(switch2)
        switch3 = customtkinter.CTkSwitch(master=self.scrollable_frame, text="Cytoplasm")
        switch3.grid(row=2, column=0, padx=10, pady=(10, 5), sticky="w")
        self.scrollable_frame_switches.append(switch3)
        switch4 = customtkinter.CTkSwitch(master=self.scrollable_frame, text="Rough ER")
        switch4.grid(row=3, column=0, padx=10, pady=(10, 5), sticky="w")
        self.scrollable_frame_switches.append(switch4)

        # create checkbox and switch frame
        #self.checkbox_slider_frame = customtkinter.CTkFrame(self)
        self.checkbox_slider_frame = customtkinter.CTkScrollableFrame(self, label_text="Dimensionality Reduction Techniques")
        self.checkbox_slider_frame.grid(row=1, column=3, padx=(20, 20), pady=(20, 0), sticky="nsew")
        self.label_checkbox_slider = customtkinter.CTkLabel(master=self.checkbox_slider_frame) 
        self.checkbox_1 = customtkinter.CTkCheckBox(master=self.checkbox_slider_frame)
        self.checkbox_1.grid(row=1, column=0, pady=(10, 0), padx=20, sticky="w")
        self.checkbox_2 = customtkinter.CTkCheckBox(master=self.checkbox_slider_frame)
        self.checkbox_2.grid(row=2, column=0, pady=(30, 0), padx=20, sticky="w")
        self.checkbox_3 = customtkinter.CTkCheckBox(master=self.checkbox_slider_frame)
        self.checkbox_3.grid(row=3, column=0, pady=(30,0), padx=20, sticky="w")
        self.checkbox_4 = customtkinter.CTkCheckBox(master=self.checkbox_slider_frame)
        self.checkbox_4.grid(row=4, column=0, pady=30, padx=20, sticky="w")
        self.checkbox_1.configure(text="Isometric Feature Mapping")
        self.checkbox_2.configure(text="Principle Component Analysis")
        self.checkbox_3.configure(text="Variational Autoencoder")
        self.checkbox_4.configure(text="Uniform Manifold Approximation and Projection")
        
        
        # create tabview for outputs 
        self.tabview = customtkinter.CTkTabview(self, width=250)
        self.tabview.grid(row=0, column=1, padx=(20, 0), pady=(140, 0), rowspan = 2,sticky="nsew")
        self.tabview.add("Phenosimilars")
        self.tabview.add("Tab 2")
        self.tabview.add("Tab 3")
        self.tabview.tab("Phenosimilars").grid_columnconfigure(0, weight=1)  # configure grid of individual tabs
        self.tabview.tab("Tab 2").grid_columnconfigure(0, weight=1)
        #self.tabview.configure(font=customtkinter.CTkFont(size=15))

        #self.label_tab_2 = customtkinter.CTkLabel(self.tabview.tab("Tab 2"), text="Hi")
        #self.label_tab_2.grid(row=1, column=1, padx=20, pady=20)

        
        # set default values
        #self.checkbox_1.select()
        #self.scrollable_frame_switches[0].select()
        #self.scrollable_frame_switches[4].select()
        self.appearance_mode_optionemenu.set("Dark")
        self.scaling_optionemenu.set("100%")
        self.textbox.insert("0.0", "Welcome to Phenomics! This application will allow you to query the JUMP Cell Painting Dataset. This application was created by James Altham, Gaia Di Bernardini, Abby Kuelker, Samantha Prosperi, Cooper Ross, and Hannah Wilker with the invaluable help of Professor Timothy Lannin and Dr. Mathew Pharris.")
        self.textbox.configure(font=customtkinter.CTkFont(size=20))
        #self.seg_button_1.configure(values=["Phenosimilars", "Graphical Results", "Listed Results"])
        #self.seg_button_1.set("Value 2")

    def open_input_dialog_event(self):
        dialog = customtkinter.CTkInputDialog(text="Enter your desired correlation cut-off (ex: for 90% and above enter 0.9)", title="CTkInputDialog")
        corr = dialog.get_input()
        if corr:  # Check if the user provided input
            self.corr = corr  # Update the class attribute with the user's input
        return corr

    def change_appearance_mode_event(self, new_appearance_mode: str):
        customtkinter.set_appearance_mode(new_appearance_mode)

    def change_scaling_event(self, new_scaling: str):
        new_scaling_float = int(new_scaling.replace("%", "")) / 100
        customtkinter.set_widget_scaling(new_scaling_float)

    def sidebar_button_event(self):
        print("sidebar_button click") 
        
        
# validate that the user has the neccesary inputs before querying  

    def validate_input(self):
        # Check if one of the radiobuttons is selected
        if self.radio_var.get() == -1:
            return False

        # Check if at least one checkbox is selected
        if not any(checkbox.get() for checkbox in [self.checkbox_1, self.checkbox_2, self.checkbox_3, self.checkbox_4]):
            return False

        # Check if the query textbox is not empty
        if not self.entry.get():
            return False

        return True
    

    def query_button_click(self):
        if self.validate_input(): #if the user has inputted everything required, run our script 
       
    # create a function to run the correlations   
            def correlations(dataframe, query, big_data, name):
                query_row = dataframe.index[dataframe.iloc[:, 0] == query]
                user_row = query_row[0] #for now pick the first time the query occurs, this is the row we will correlated to 
                user_row = dataframe.iloc[user_row] #obtain the numerical row 
                user_row = user_row[1:]
                pearson_correlations = []
                spearman_correlations = []
                length = len(dataframe)
                for i in range(length):  
                    comp_row = dataframe.iloc[i]
                    comp_row = comp_row[1:] #this is 8 for the norm data but needs to be changed depending on dimensions 
                    pearson_corr, _ = pearsonr(user_row, comp_row) #only storing pearson_corr
                    spearman_corr, _ = spearmanr(user_row, comp_row)
                    pearson_correlations.append(pearson_corr)
                    spearman_correlations.append(spearman_corr)

                pearson_df = pd.DataFrame({'Pearson_Correlation': pearson_correlations})
                spearman_df = pd.DataFrame({'Spearman_Correlation': spearman_correlations})
                mega_df[f'{name}Pearson'] = pearson_df['Pearson_Correlation']
                mega_df[f'{name}Spearman'] = spearman_df['Spearman_Correlation']
                #print(mega_df)
                return mega_df  
            
            #function for sorting values in ascending order
            def sorting(dataframe, mega_df, corr, name):
                print("Starting")
                if self.radio_var.get() == 1: #if pearson is clicked 
                    print("Pearson is selected")
                    sorted_df = mega_df.sort_values(by=f'{name}Pearson', ascending=False)
                    filtered_df = sorted_df[sorted_df[f'{name}Pearson'] > corr]
                    filtered_df = filtered_df[[f'InChiKey', f'{name}Pearson']]  # Keep only specified columns
                    print
                    return(filtered_df)
                if self.radio_var.get() == 2: #if spearman is clicked 
                    print("Spearman is selected")
                    sorted_df = mega_df.sort_values(by=f'{name}Spearman', ascending=False)
                    filtered_df = sorted_df[sorted_df[f'{name}Spearman'] > corr]
                    filtered_df = filtered_df[[f'InChiKey', f'{name}Spearman']]  # Keep only specified columns
                    return(filtered_df)

            def format_results(outputs): #creates a nice table for the results depending on user selection 
                # Create a style
                style = ttk.Style()
                style.configure("Treeview", rowheight=65)
                self.treeview = ttk.Treeview(master=self.tabview.tab("Phenosimilars"))
                #outputs = outputs.reset_index(drop=True)
                columns = list(outputs.columns)
                self.treeview["columns"] = columns
                print(columns)
                for col in columns:
                    self.treeview.heading(col, text=col)
                    self.treeview.column(col, anchor="center")
            
                # Insert data into the table
                for index, row in outputs.iterrows():
                    self.treeview.insert("", index, values=tuple(row))

                # Add Treeview to the parent tab
                self.treeview.pack(fill="both", expand=True)
               
                return(outputs)
            
            print("Querying..")         
            norm_data = pd.read_csv(r'C:\Users\abbyk\OneDrive - Northeastern University\Documents\Capstone\AK_data_labeled.csv')
            norm_nums = norm_data.select_dtypes(include=['number'])
            self.query = self.entry.get()
            print(self.query)
            print("Correlation cut-off:", self.corr)
            # create mega dataframe to store all correlation values including the inchikey
            mega_df = pd.DataFrame()
            final_df = pd.DataFrame()
            count = 0
            mega_df['InChiKey'] = norm_data['Metadata_InChIKey']
            if self.checkbox_1.get():
                n_comps = 3
                embedding = Isomap(n_components=n_comps, n_neighbors = 100)
                isomap = embedding.fit_transform(norm_nums) #do the data without the metadata
                isomap_df = pd.DataFrame(isomap, columns = ['Isomap 1','Isomap 2','Isomap 3'])
                isomap_df.insert(0, 'InChiKey', norm_data['Metadata_InChIKey'])
                #print(isomap_df)
                #print("Isomap")
                mega_df = correlations(isomap_df, self.query, mega_df, 'Isomap')
                #print(mega_df)
                sorted_isomap_df = sorting(isomap_df, mega_df, self.corr,'Isomap')
                #print(sorted_isomap_df)
                sorted_isomap_df = sorted_isomap_df.reset_index(drop=True)
                sorted_isomap_df.rename(columns={'InChiKey': "IsomapInChiKey"}, inplace=True)
                final_df = pd.concat([sorted_isomap_df.head(10), final_df], axis=1)
                isomap_outputs = sorted_isomap_df.head(10)['IsomapInChiKey']
                #output_table = format_results(final_df)
                count = count+1
            if self.checkbox_2.get():
                pca = PCA(n_components = 3)
                comps = pca.fit_transform(norm_nums)
                pca_df = pd.DataFrame(comps, columns=['PC1', 'PC2', 'PC3'])
                pca_df.insert(0, 'InChiKey', norm_data['Metadata_InChIKey']) 
                print("PCA")
                mega_df = correlations(pca_df, self.query, mega_df, 'PCA')
                sorted_pca_df = sorting(pca_df, mega_df, self.corr, 'PCA')
                sorted_pca_df = sorted_pca_df.reset_index(drop=True)
                #sorted_pca_df["PCAInChiKey"] = sorted_pca_df["InChiKey"]
                sorted_pca_df.rename(columns={'InChiKey': "PCAInChiKey"}, inplace=True)
                pca_outputs = sorted_pca_df.head(10)['PCAInChiKey']
                final_df = pd.concat([sorted_pca_df.head(10), final_df], axis=1)
                count = count+1
               # pca_outputs = sorted_pca_df.head(10)['PCAInChiKey']
               # output_table = format_results(isomap_outputs)
            if self.checkbox_4.get():
                n_neighbors = 5 
                min_dist = 0.1   
                umap = UMAP(n_components=3, n_neighbors=n_neighbors, min_dist=min_dist)
                comps = umap.fit_transform(norm_nums)
                umap_df = pd.DataFrame(comps, columns=['Umap1', 'Umap2', 'Umap3'])
                umap_df.insert(0, 'InChiKey', norm_data['Metadata_InChIKey']) 
                print("UMAP")
                mega_df = correlations(umap_df, self.query, mega_df, 'UMAP')
                sorted_umap_df = sorting(umap_df, mega_df, self.corr, 'UMAP')
                sorted_umap_df = sorted_umap_df.reset_index(drop=True)
                #sorted_umap_df["UmapInChiKey"] = sorted_umap_df["InChiKey"]
                sorted_umap_df.rename(columns={'InChiKey': "UmapInChiKey"}, inplace=True)
                final_df = pd.concat([sorted_umap_df.head(10), final_df], axis=1)
                count = count+1

            # compute raw correlations on the normalized dataset 
            query_row = norm_data.index[norm_data.iloc[:, 6] == self.query] #return all rows in which the query sits, this is 6 for norm data
            user_row = query_row[0] #for now pick the first time the query occurs, this is the row we will correlated to 
            user_row = norm_data.iloc[user_row] #obtain the numerical row 
            user_row = user_row[8:]
            pearson_correlations = []
            spearman_correlations = []
            length = len(norm_data)
            for i in range(length):  
                comp_row = norm_data.iloc[i]
                comp_row = comp_row[8:] #this is 8 for the norm data but needs to be changed depending on dimensions 
                pearson_corr, _ = pearsonr(user_row, comp_row) #only storing pearson_corr
                spearman_corr, _ = spearmanr(user_row, comp_row)
                pearson_correlations.append(pearson_corr)
                spearman_correlations.append(spearman_corr)
            pearson_df = pd.DataFrame({'Pearson_Correlation': pearson_correlations})
            spearman_df = pd.DataFrame({'Spearman_Correlation': spearman_correlations})
            mega_df['Raw_Pearson'] = pearson_df['Pearson_Correlation']
            mega_df['Raw_Spearman'] = spearman_df['Spearman_Correlation']  
            print(count)
            print(final_df)
            output_table = format_results(final_df)
        else:
            error_message = "Error: Please select one of the radiobuttons, select at least one checkbox, and enter text in the query textbox."
            tkinter.messagebox.showerror("Input Error", error_message)

if __name__ == "__main__":
    app = App()
    app.mainloop()