In [5]:
import tkinter as tk
from tkinter import ttk, filedialog
import pandas as pd
import threading
import re
import fitz


class ResumeDownloader:
    def __init__(self, resume_directory, progress_var, log_text):
        self.resume_directory = resume_directory
        self.progress_var = progress_var
        self.log_text = log_text

    def download_resume(self, resume_url):
        try:
            # Placeholder code to download the resume
            # Replace this code with actual code to download the resume
            self.log_text.insert(tk.END, f"Downloading resume: {resume_url}\n")
            self.progress_var.set(50)  # Update progress bar
        except Exception as e:
            self.log_text.insert(tk.END, f"Error occurred while downloading resume: {resume_url}\n")
            self.log_text.insert(tk.END, f"Error message: {str(e)}\n")



class ResumeConverter:
    def __init__(self, progress_var, log_text):
        self.progress_var = progress_var
        self.log_text = log_text

    def convert_to_pdf(self, resume_file_path):
        try:
            # Placeholder code to convert the resume to PDF
            # Replace this code with actual code to convert the resume to PDF
            self.log_text.insert(tk.END, f"Converting resume: {resume_file_path}\n")
            self.progress_var.set(75)  # Update progress bar
            return "converted_resume.pdf"
        except Exception as e:
            self.log_text.insert(tk.END, f"Error occurred while converting resume: {resume_file_path}\n")
            self.log_text.insert(tk.END, f"Error message: {str(e)}\n")


class ResumeFilter:
    def __init__(self, df, resume_directory, progress_var, log_text):
        self.df = df
        self.resume_directory = resume_directory
        self.progress_var = progress_var
        self.log_text = log_text

    def filter_dataframe_by_keyword(self, keywords, selected_columns):
        try:
            filtered_df = self.df[selected_columns]

            def has_keyword(file_path):
                try:
                    with fitz.open(file_path) as doc:
                        for page in doc:
                            try:
                                page_text = page.get_text()
                                for keyword in keywords:
                                    pattern = r'\b{}\b'.format(re.escape(keyword))
                                    if re.search(pattern, page_text, re.IGNORECASE):
                                        return True
                            except Exception as e:
                                self.log_text.insert(tk.END, f"Error occurred while processing page in PDF file: {file_path}\n")
                except Exception as e:
                    self.log_text.insert(tk.END, f"Error occurred while processing PDF file: {file_path}\n")

                return False

            filtered_df['Has Keyword'] = filtered_df['Resume Path'].apply(has_keyword)
            return filtered_df
        except Exception as e:
            self.log_text.insert(tk.END, "Error occurred while filtering the dataframe.\n")
            self.log_text.insert(tk.END, f"Error message: {str(e)}\n")


class ResumeFilterApp:
    def __init__(self):
        self.root = tk.Tk()
        self.root.title("Resume Filter")
        self.excel_file_path = tk.StringVar()
        self.resume_directory = "resumes"
        self.df = pd.DataFrame()

        self.create_widgets()

    def create_widgets(self):
        frame = ttk.Frame(self.root, padding=20)
        frame.pack()

        # Excel file path
        excel_label = ttk.Label(frame, text="Excel File:")
        excel_label.grid(row=0, column=0, sticky="w")
        excel_entry = ttk.Entry(frame, textvariable=self.excel_file_path, state="readonly")
        excel_entry.grid(row=0, column=1, sticky="w", padx=5, pady=5)
        excel_browse_button = ttk.Button(frame, text="Browse", command=self.browse_excel_file)
        excel_browse_button.grid(row=0, column=2, padx=5, pady=5)

        # Load Data button
        load_data_button = ttk.Button(frame, text="Load Data", command=self.load_data)
        load_data_button.grid(row=1, column=0, columnspan=3, padx=5, pady=5)

        # Search keywords entry
        search_keywords_label = ttk.Label(frame, text="Search Keywords (comma-separated):")
        search_keywords_label.grid(row=2, column=0, sticky="w", padx=5, pady=5)
        self.search_keywords_entry = ttk.Entry(frame)
        self.search_keywords_entry.grid(row=2, column=1, columnspan=2, sticky="w", padx=5, pady=5)

        # Checklist columns
        # checklist_columns_label = ttk.Label(frame, text="Select Columns:")
        # checklist_columns_label.grid(row=3, column=0, sticky="w", padx=5, pady=5)
        # Create the column checklist
        # Columns checklist
        columns_label = ttk.Label(frame, text="Select Columns to Show:")
        # columns_label.pack(side="top", padx=5, pady=5)
        columns_label.grid(row=3, column=0, sticky="w", padx=5, pady=5)


        self.checklist_values = []
        self.checklist_columns = []

        # columns_label = ttk.Label(frame, text="Select Columns to Show:")
        # columns_label.grid(row=3, column=0, sticky="w", padx=5, pady=5)

        for i, column in enumerate(self.df.columns):
            var = tk.StringVar()
            var.set(column)  # Set the column name as the default value (ticked on)
            # self.checklist_values.append(var)
            self.checklist_values.append(column)
            checkbutton = ttk.Checkbutton(frame, text=column, variable=var, command=self.update_dataframe_table)
            checkbutton.grid(row=i + 4, column=0, columnspan=3, sticky="w", padx=5, pady=2)
            self.checklist_columns.append(checkbutton)


        # Start Download button
        start_download_button = ttk.Button(frame, text="Start Download", command=self.start_download_thread)
        start_download_button.grid(row=4+len(self.df.columns), column=0, columnspan=3, padx=5, pady=5)

        # Progress bar
        self.progress_var = tk.DoubleVar()
        progress_bar = ttk.Progressbar(frame, variable=self.progress_var, maximum=100)
        progress_bar.grid(row=5+len(self.df.columns), column=0, columnspan=3, padx=5, pady=5)

        # Log text area
        log_label = ttk.Label(frame, text="Log:")
        log_label.grid(row=6+len(self.df.columns), column=0, sticky="w", padx=5, pady=5)
        self.log_text = tk.Text(frame, width=40, height=10)
        self.log_text.grid(row=7+len(self.df.columns), column=0, columnspan=3, padx=5, pady=5)

        # Table frame
        # self.table_frame = ttk.Frame(self.root, padding=20)
        # self.table_frame.pack()

        self.table_frame = ttk.Frame(self.root, padding=20)
        # self.table_frame.grid(row=0, column=1, rowspan=len(self.df)+1, padx=10, pady=10, sticky="nsew")

        self.table_frame.pack(side="left", fill="both", expand=True)

        # self.table_frame.pack()



        # Run the application
        self.root.mainloop()


    def browse_excel_file(self):
        self.excel_file_path.set(filedialog.askopenfilename(filetypes=[("Excel Files", "*.xlsx")]))


    def load_data(self):
        try:
            if not self.excel_file_path.get():
                self.log_text.insert(tk.END, "Please select an Excel file.\n")
                return

            self.df = pd.read_excel(self.excel_file_path.get())
            self.log_text.insert(tk.END, self.df)

            # Enable/disable the checklist columns based on the loaded dataframe
            for checkbutton in self.checklist_columns:
                checkbutton.configure(state="disabled")
            if not self.df.empty:
                for checkbutton in self.checklist_columns:
                    checkbutton.configure(state="normal")

            self.update_dataframe_table()
        except Exception as e:
            self.log_text.insert(tk.END, "Error occurred while loading data.\n")
            self.log_text.insert(tk.END, f"Error message: {str(e)}\n")

    def update_dataframe_table(self):
        try:
        # Clear existing table
            for child in self.table_frame.winfo_children():
                child.destroy()
                self.log_text.insert(tk.END, "1\n")

            selected_columns = [column.get() for column in self.checklist_values if column.get()]
            if not selected_columns:
                self.log_text.insert(tk.END, "2\n")
                return
            

            filtered_df = self.filter_dataframe_by_keyword(self.df, self.search_keywords, selected_columns)
            columns = filtered_df.columns
            data = filtered_df.values.tolist()
            self.log_text.insert(tk.END, "3\n")

            # Create header
            for i, column in enumerate(columns):
                header_label = ttk.Label(self.table_frame, text=column)
                header_label.grid(row=0, column=i, padx=5, pady=5)

            # Populate data
            for i, row in enumerate(data):
                for j, value in enumerate(row):
                    cell_label = ttk.Label(self.table_frame, text=value)
                    cell_label.grid(row=i+1, column=j, padx=5, pady=5)

        except Exception as e:
            self.log_text.insert(tk.END, "Error occurred while updating the dataframe table.\n")
            self.log_text.insert(tk.END, f"Error message: {str(e)}\n")


    def start_download_thread(self):
        try:
            keywords = self.search_keywords_entry.get().split(",")
            print(self.checklist_values)
            selected_columns = ["First Name"]#self.df.columns.values#[var for i, var in enumerate(self.checklist_values)]

            if not keywords:
                self.log_text.insert(tk.END, "Please enter search keywords.\n")
                return
            if not selected_columns:
                self.log_text.insert(tk.END, "Please select at least one column.\n")
                return

            resume_filter = ResumeFilter(self.df, self.resume_directory, self.progress_var, self.log_text)

            thread = threading.Thread(target=self.download_resumes, args=(keywords, selected_columns, resume_filter))
            thread.start()
        except Exception as e:
            self.log_text.insert(tk.END, "Error occurred while starting the download thread.\n")
            self.log_text.insert(tk.END, f"Error message: {str(e)}\n")

    def download_resumes(self, keywords, selected_columns, resume_filter):
        try:
            filtered_df = resume_filter.filter_dataframe_by_keyword(keywords, selected_columns)
            resume_downloader = ResumeDownloader(self.resume_directory, self.progress_var, self.log_text)

            total_resumes = len(filtered_df)
            self.progress_var.set(0)  # Reset progress bar

            for index, row in filtered_df.iterrows():
                resume_url = row['Resume Path']

                self.log_text.insert(tk.END, f"Downloading resume: {resume_url}\n")

                # Update progress bar
                progress_value = int((index + 1) / total_resumes * 100)
                self.progress_var.set(progress_value)

                resume_downloader.download_resume(resume_url)

            self.log_text.insert(tk.END, "Download completed.\n")
            self.progress_var.set(100)  # Update progress bar

        except Exception as e:
            self.log_text.insert(tk.END, "Error occurred while downloading resumes.\n")
            self.log_text.insert(tk.END, f"Error message: {str(e)}\n")


if __name__ == "__main__":
    app = ResumeFilterApp()


[]
