In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sqlite3
from ipywidgets import Button, VBox, HBox, FileUpload, Output, Layout
from IPython.display import display

In [150]:
class ConfigurableDataAnalysis:
    def __init__(self):
        # Initialize empty attributes for data
        self.user_log = None
        self.activity_log = None
        self.component_codes = None
        self.merged_data = None
        self.reshaped_data = None

    def load_file(self,file_path):
      """
      function that helps loading the data
      """
      if file_path.lower().endswith('.csv'):
          return pd.read_csv(file_path)
      elif file_path.lower().endswith('.json'):
          return pd.read_json(file_path)
      else:
          raise ValueError(f"Unsupported file format for {file_path}. Only CSV and JSON are supported.")

    def load_data(self, user_log_path, activity_log_path, component_codes_path):
        """Load the data when the button is clicked.
          Supports both CSV and JSON file formats.
        """
        try:
            # Check if files exist
            for file_path in [user_log_path, activity_log_path, component_codes_path]:
              if not os.path.exists(file_path):
                  print(f"Error: The file '{file_path}' does not exist.")
                  return

            # Load the files into DataFrames
            self.user_log = self.load_file(user_log_path)
            self.activity_log = self.load_file(activity_log_path)
            self.component_codes = self.load_file(component_codes_path)



            print("Data loaded successfully:")
            print(f"User log file: {user_log_path}")
            print(f"Activity log file: {activity_log_path}")
            print(f"Component codes file: {component_codes_path}")
        except Exception as e:
            print(f"Error loading data: {str(e)}")


    def convert_to_json(self):
        self.activity_log.to_json("ACTIVITY_LOG.json",orient="records")
        self.user_log.to_json("USER_LOG.json",orient="records")
        self.component_codes.to_json("COMPONENT_CODES.json",orient="records")

        print("successfully converted to json")

    def clean_data(self):
      # TODO save indexes if data is removed to sync the two csv files
      try:

        original_counts = {
            'user_log': len(self.user_log) if self.user_log is not None else 0,
            'activity_log': len(self.activity_log) if self.activity_log is not None else 0,
            'component_codes': len(self.component_codes) if self.component_codes is not None else 0
        }

        # Drop rows with missing values
        if self.user_log is not None:
            self.user_log = self.user_log.dropna()
            print("User log data cleaned successfully.")
        else:
            print("No user log data to clean.")

        if self.activity_log is not None:
            self.activity_log = self.activity_log.dropna()
            print("Activity log data cleaned successfully.")
        else:
            print("No activity log data to clean.")

        if self.component_codes is not None:
            self.component_codes = self.component_codes.dropna()
            print("Component codes data cleaned successfully.")
        else:
            print("No component codes data to clean.")

        rows_removed = {
              'user_log': original_counts['user_log'] - len(self.user_log) if self.user_log is not None else 0,
              'activity_log': original_counts['activity_log'] - len(self.activity_log) if self.activity_log is not None else 0,
              'component_codes': original_counts['component_codes'] - len(self.component_codes) if self.component_codes is not None else 0
          }

        print("NaN values removed:")
        for df_name, removed in rows_removed.items():
            print(f"{df_name}: {removed} rows removed")

      except Exception as e:
            print(f"Error during cleaning: {str(e)}")


    def remove_data(self):
      try:
        # first remove all rows in activity_log that contain "System" or "Folder and the matching rows in user_log"
        if self.user_log is None or self.activity_log is None:
            print("Error: Both DataFrames must be loaded first")
            return

        # Store original lengths
        original_length = len(self.user_log)

        # Create mask for filtering (True for rows we want to keep)
        mask = ~self.activity_log['Component'].isin(['System', 'Folder'])

        # Apply filtering to both DataFrames using the mask
        self.user_log = self.user_log[mask]
        self.activity_log = self.activity_log[mask]

        # Calculate number of rows removed
        rows_removed = original_length - len(self.user_log)

        print(f"Filtering complete:")
        print(f"Removed {rows_removed} rows containing 'System' or 'Folder'")
        print(f"Remaining rows in each DataFrame: {len(self.user_log)}")


        # remove the rows from component_codes
        if self.component_codes is None:
            print("Error: component_codes DataFrame must be loaded first")
            return
        self.component_codes = self.component_codes[~self.component_codes['Component'].isin(['System', 'Folder'])].reset_index(drop=True)
        print("removed the rows from component_codes")


      except Exception as e:
          print(f"Error during filtering: {str(e)}")

    def rename_data(self):
      try:
        self.activity_log= self.activity_log.rename(columns={'User Full Name *Anonymized': 'USER_ID'})
        self.user_log = self.user_log.rename(columns={'User Full Name *Anonymized': 'USER_ID'})
        print("renamed the columns")
      except Exception as e:
          print(f"Error during renaming: {str(e)}")



    def merge_data(self):
        """Concatenate all CSV files into one DataFrame."""
        try:
            # Ensure data is loaded
            if self.user_log is not None and self.activity_log is not None and self.component_codes is not None:
                # Concatenate user_log and activity_log horizontally
                concatenated_data = pd.concat(
                    [self.user_log, self.activity_log],
                    axis=1
                )

                # Remove duplicate columns if any
                concatenated_data = concatenated_data.loc[:, ~concatenated_data.columns.duplicated()]

                # Convert 'Date' column to datetime, handling the day-first format
                concatenated_data['Date'] = pd.to_datetime(concatenated_data['Date'], errors='coerce', dayfirst=True)

                # Merge with component_codes
                concatenated_data = concatenated_data.merge(
                    self.component_codes,
                    on="Component",
                    how="right"  # Adjust 'how' to 'inner', 'left', 'right', or 'outer' as needed
                )

                # Set merged data to self.merged_data
                self.merged_data = concatenated_data  # Save the result
                print("Data merged successfully!")
                return "Data merged successfully!"
            else:
                return "No data loaded. Please load all necessary data files (user_log, activity_log, component_codes) first."
        except Exception as e:
            return f"Error during data merge: {e}"

    def reshape_data(self):
        """Reshape data using pivot."""
        try:
            # Ensure data is merged
            if self.merged_data is None:
                raise ValueError("Data must be merged before reshaping.")

            # copy the merged data
            self.reshaped_data = self.merged_data.copy()

            # combine Date and Time to one column
            self.reshaped_data["Date"]=pd.to_datetime(self.reshaped_data['Date'].astype(str) + ' ' + self.reshaped_data['Time'].astype(str))

            # make the pivot table
            self.reshaped_data = pd.pivot_table(
            self.reshaped_data,
            index='USER_ID',
            columns="Date",
            values =  "Component",
            aggfunc=lambda x: list(x) # check if there are duplicates in the time and if not change the code to this aggfunc='first
            )
            print("Data reshaped successfully.")
        except Exception as e:
            print(f"Error during reshaping: {str(e)}")

    def count_data(self):
      try:
        # Add the monthly column to merged_data
        self.merged_data["Month_Year"]=self.merged_data["Date"].dt.to_period("M")
        monthly_counts = pd.pivot_table(
            self.merged_data,
            index='USER_ID',
            columns='Month_Year',
            values='Component',
            aggfunc=lambda x: {comp: list(x).count(comp) for comp in set(x)}
        )

        self.reshaped_data=pd.concat({
          'daily_components': self.reshaped_data,
          'monthly_counts': monthly_counts
      }, axis=1)

        print("count rows are added")
      except Exception as e:
            print(f"Error generating counts: {str(e)}")

    def produce_output_statistics(self):
      try:
        # get the counts for each month as a pandas series
        quiz_counts= self.reshaped_data["monthly_counts"].map(lambda x: x.get('Quiz')if isinstance(x, dict) else 0)
        assignment_counts= self.reshaped_data["monthly_counts"].map(lambda x: x.get('Assignment')if isinstance(x, dict) else 0)
        lecture_counts= self.reshaped_data["monthly_counts"].map(lambda x: x.get('Lecture')if isinstance(x, dict) else 0)
        Attendance_counts= self.reshaped_data["monthly_counts"].map(lambda x: x.get('Attendence')if isinstance(x, dict) else 0)
        Survey_counts= self.reshaped_data["monthly_counts"].map(lambda x: x.get('Survey')if isinstance(x, dict) else 0)

        # calculate the mean,mode,median for each of the columns
        self.quiz_mean=quiz_counts.mean(axis=0)
        self.assignment_mean=assignment_counts.mean(axis=0)
        self.lecture_mean=lecture_counts.mean(axis=0)
        self.Attendance_mean=Attendance_counts.mean(axis=0)
        self.Survey_mean=Survey_counts.mean(axis=0)

        self.quiz_mode=quiz_counts.mode(axis=0).values[0]
        self.assignment_mode=assignment_counts.mode(axis=0).values[0]
        self.lecture_mode=lecture_counts.mode(axis=0).values[0]
        self.Attendance_mode=Attendance_counts.mode(axis=0).values[0]
        self.Survey_mode=Survey_counts.mode(axis=0).values[0]

        self.quiz_median=quiz_counts.median(axis=0)
        self.assignment_median=assignment_counts.median(axis=0)
        self.lecture_median=lecture_counts.median(axis=0)
        self.Attendance_median=Attendance_counts.median(axis=0)
        self.Survey_median=Survey_counts.median(axis=0)

        # a)

        print("\nMonthly Statistics:")
        print(f"Assignments - September - Mean: {self.assignment_mean[0]}, Median: {self.assignment_median[0]}, Mode: {self.assignment_mode[0]}")
        print(f"Assignments - October - Mean: {self.assignment_mean[1]}, Median: {self.assignment_median[1]}, Mode: {self.assignment_mode[1]}")
        print(f"Assignments - November - Mean: {self.assignment_mean[2]}, Median: {self.assignment_median[2]}, Mode: {self.assignment_mode[2]}")
        print(f"Assignments - December - Mean: {self.assignment_mean[3]}, Median: {self.assignment_median[3]}, Mode: {self.assignment_mode[3]}")

        print(f"Lectures - September - Mean: {self.lecture_mean[0]}, Median: {self.lecture_median[0]}, Mode: {self.lecture_mode[0]}")
        print(f"Lectures - October - Mean: {self.lecture_mean[1]}, Median: {self.lecture_median[1]}, Mode: {self.lecture_mode[1]}")
        print(f"Lectures - November - Mean: {self.lecture_mean[2]}, Median: {self.lecture_median[2]}, Mode: {self.lecture_mode[2]}")
        print(f"Lectures - December - Mean: {self.lecture_mean[3]}, Median: {self.lecture_median[3]}, Mode: {self.lecture_mode[3]}")

        print(f"Attendance - September - Mean: {self.Attendance_mean[0]}, Median: {self.Attendance_median[0]}, Mode: {self.Attendance_mode[0]}")
        print(f"Attendance - October - Mean: {self.Attendance_mean[1]}, Median: {self.Attendance_median[1]}, Mode: {self.Attendance_mode[1]}")
        print(f"Attendance - November - Mean: {self.Attendance_mean[2]}, Median: {self.Attendance_median[2]}, Mode: {self.Attendance_mode[2]}")
        print(f"Attendance - December - Mean: {self.Attendance_mean[3]}, Median: {self.Attendance_median[3]}, Mode: {self.Attendance_mode[3]}")

        print(f"Survey - September - Mean: {self.Survey_mean[0]}, Median: {self.Survey_median[0]}, Mode: {self.Survey_mode[0]}")
        print(f"Survey - October - Mean: {self.Survey_mean[1]}, Median: {self.Survey_median[1]}, Mode: {self.Survey_mode[1]}")
        print(f"Survey - November - Mean: {self.Survey_mean[2]}, Median: {self.Survey_median[2]}, Mode: {self.Survey_mode[2]}")
        print(f"Survey - December - Mean: {self.Survey_mean[3]}, Median: {self.Survey_median[3]}, Mode: {self.Survey_mode[3]}")

        # b)
        self.quiz_counts_entire_year= quiz_counts.sum(axis=1)
        self.assignemnet_counts_entire_year= assignment_counts.sum(axis=1)
        self.lecture_counts_entire_year= lecture_counts.sum(axis=1)
        self.Attendance_counts_entire_year= Attendance_counts.sum(axis=1)
        self.Survey_counts_entire_year= Survey_counts.sum(axis=1)

        # Calculate statistics for entire year
        self.quiz_year_mean = quiz_counts_entire_year.mean()
        self.assignment_year_mean = assignemnet_counts_entire_year.mean()
        self.lecture_year_mean = lecture_counts_entire_year.mean()
        self.Attendance_year_mean = Attendance_counts_entire_year.mean()
        self.Survey_year_mean = Survey_counts_entire_year.mean()

        self.quiz_year_mode = quiz_counts_entire_year.mode().values[0]
        self.assignment_year_mode = assignemnet_counts_entire_year.mode().values[0]
        self.lecture_year_mode = lecture_counts_entire_year.mode().values[0]
        self.Attendance_year_mode = Attendance_counts_entire_year.mode().values[0]
        self.Survey_year_mode = Survey_counts_entire_year.mode().values[0]

        self.quiz_year_median = quiz_counts_entire_year.median()
        self.assignment_year_median = assignemnet_counts_entire_year.median()
        self.lecture_year_median = lecture_counts_entire_year.median()
        self.Attendance_year_median = Attendance_counts_entire_year.median()
        self.Survey_year_median = Survey_counts_entire_year.median()

        # Print statements for entire year statistics
        print("\nEntire Year Statistics:")
        print(f"Quiz - Mean: {self.quiz_year_mean}, Median: {self.quiz_year_median}, Mode: {self.quiz_year_mode}")
        print(f"Assignment - Mean: {self.assignment_year_mean}, Median: {self.assignment_year_median}, Mode: {self.assignment_year_mode}")
        print(f"Lecture - Mean: {self.lecture_year_mean}, Median: {self.lecture_year_median}, Mode: {self.lecture_year_mode}")
        print(f"Attendance - Mean: {self.Attendance_year_mean}, Median: {self.Attendance_year_median}, Mode: {self.Attendance_year_mode}")
        print(f"Survey - Mean: {self.Survey_year_mean}, Median: {self.Survey_year_median}, Mode: {self.Survey_year_mode}")



        print("Statistics generated successfully.")
      except Exception as e:
            print(f"Error generating statistics: {str(e)}")


    def generate_correlation(self):
        """Generate correlation matrix and display heatmap."""
        try:
            components = [self.quiz_counts_entire_year, self.assignemnet_counts_entire_year, self.lecture_counts_entire_year, self.Attendance_counts_entire_year, self.Survey_counts_entire_year]
            names = ["quiz", "assignment", "lecture", "Attendance", "Survey"]
            fig, axs = plt.subplots(len(components), 1, figsize=(20, 25))  # One subplot per component (vertical layout)

            for i, (component, name) in enumerate(zip(components, names)):
                x = component.index.values
                y = component.values

                # Calculate the regression line
                slope, intercept = np.polyfit(x, y, 1)
                regression_line = slope * x + intercept

                # Plotting in each subplot
                axs[i].scatter(x, y, color='blue', s=50, label=f"{name} Data Points")
                axs[i].plot(x, regression_line, color='red', linewidth=2, label=f"{name} Regression Line")

                # Adding labels and title
                axs[i].set_xlabel("User ID", fontsize=12)
                axs[i].set_ylabel(f"{name} Count", fontsize=12)
                axs[i].set_title(f"{name}: Scatterplot with Regression Line", fontsize=14)
                axs[i].grid(True, linestyle='--', alpha=0.7)
                axs[i].legend(fontsize=12)

                # Set axis limits to start from (0, 0)
                axs[i].set_xlim(left=0)
                axs[i].set_ylim(bottom=0)


            # Adjust layout for better spacing
            plt.tight_layout()

            # Show the figure
            plt.show()

        except Exception as e:
              print(f"Error generating correlation matrix: {str(e)}")
    def backup_files(self):
          try:
            self.activity_log.to_json("ACTIVITY_LOG_backup.json",orient="records")
          except Exception as e:
                print(f"Error backup: {str(e)}")
          try:
            self.user_log.to_json("USER_LOG_backup.json",orient="records")
          except Exception as e:
                print(f"Error during backup: {str(e)}")
          try:
            self.component_codes.to_json("COMPONENT_CODES_backup.json",orient="records")
          except Exception as e:
                print(f"Error during backup: {str(e)}")
          try:
            # the year_month column has to be converted to string before saving, because pandas has a problem to save period object to json
            # make a copy, so that the original datframe doesnt get updated
            merged_df = self.merged_data.copy()
            for column in merged_df.columns:
          # Check if dtype is period
              if isinstance(merged_df[column].dtype, pd.PeriodDtype):
            # Convert period to string
                merged_df[column] = merged_df[column].astype(str)
            merged_df.to_json("MERGED_Data_backup.json",orient="records")
          except Exception as e:
                print(f"Error during backup: {str(e)}")
          try:
            self.reshaped_data.to_json("Reshaped_Data_backup.json",orient='records')
          except Exception as e:
                print(f"Error during backup: {str(e)}")

    def backup_to_db(self, backup_db_path="backup.db"):
        """Backup data to a SQLite database (overwrite if exists)."""
        try:
            if self.merged_data is None:
                raise ValueError("Data must be merged before backup.")

            conn = sqlite3.connect(backup_db_path)
            self.merged_data.to_sql('merged_data', conn, if_exists='replace', index=False)
            conn.close()

            # Generate backup success graph
            sns.set(style="darkgrid")
            plt.figure(figsize=(6, 4))
            sns.barplot(x=self.merged_data['Component'], y=self.merged_data['User_ID'])
            plt.title(f"Backup to DB Successful: {backup_db_path}")
            plt.show()

            print(f"Data successfully backed up to database {backup_db_path}.")
        except Exception as e:
            print(f"Error during backup to database: {e}")



In [147]:
test_df= ConfigurableDataAnalysis()

In [148]:
test_df.load_data("USER_LOG.csv", "ACTIVITY_LOG.csv", "COMPONENT_CODES.csv")


test_df.convert_to_json()

test_df.clean_data()

test_df.remove_data()

test_df.rename_data()

test_df.merge_data()

test_df.reshape_data()

test_df.count_data()

#test_df.produce_output_statistics()

#test_df.generate_correlation()

test_df.backup_files()



Data loaded successfully:
User log file: USER_LOG.csv
Activity log file: ACTIVITY_LOG.csv
Component codes file: COMPONENT_CODES.csv
successfully converted to json
User log data cleaned successfully.
Activity log data cleaned successfully.
Component codes data cleaned successfully.
NaN values removed:
user_log: 0 rows removed
activity_log: 0 rows removed
component_codes: 0 rows removed
Filtering complete:
Removed 5573 rows containing 'System' or 'Folder'
Remaining rows in each DataFrame: 145262
removed the rows from component_codes
renamed the columns
Data merged successfully!
Data reshaped successfully.
count rows are added
Error during backup: orient='table' is not supported for MultiIndex columns


In [158]:
from ipyfilechooser import FileChooser

In [164]:
class App:
    def __init__(self):
        self.analysis = ConfigurableDataAnalysis()
        self.output = Output()

        """
        self.file_upload = {
            "user_log": FileUpload(accept='.csv,.json', multiple=False),
            "activity_log": FileUpload(accept='.csv,.json', multiple=False),
            "component_codes": FileUpload(accept='.csv,.json', multiple=False),
        }
        """
        self.file_upload = {
    "user_log": FileChooser(
        path='.',
        filename='',
        title='Select User Log',
        filter_pattern=['*.csv', '*.json']
    ),
    "activity_log": FileChooser(
        path='.',
        filename='',
        title='Select Activity Log',
        filter_pattern=['*.csv', '*.json']
    ),
    "component_codes": FileChooser(
        path='.',
        filename='',
        title='Select Component Codes',
        filter_pattern=['*.csv', '*.json']
    )
}

        # Create button widgets
        self.load_button = Button(description="Load Data")
        self.load_button.on_click(self.load_data)

        self.convert_json_button = Button(description="Convert to JSON")
        self.convert_json_button.on_click(self.convert_to_JSON)

        self.clean_button = Button(description="Clean Data")
        self.clean_button.on_click(self.clean_and_prepare_data)

        self.merge_button = Button(description="Merge Data")
        self.merge_button.on_click(self.merge_data)

        self.reshape_button = Button(description="Reshape Data")
        self.reshape_button.on_click(self.reshape_data)

        self.count_button = Button(description="Count Data")
        self.count_button.on_click(self.count)

        self.backup_file_button = Button(description="Backup to File")
        self.backup_file_button.on_click(self.backup_to_file)

        self.backup_db_button = Button(description="Backup to Database")
        self.backup_db_button.on_click(self.backup_to_db)

        self.stats_button = Button(description="Generate Stats")
        self.stats_button.on_click(self.generate_statistics)

        self.correlation_button = Button(description="Generate Correlation")
        self.correlation_button.on_click(self.generate_correlation)

    def load_data(self, _):
        """Load datasets from uploaded CSV files."""
        with self.output:
            self.output.clear_output()
            print("starting upload")
            user_log_path = self.file_upload["user_log"].selected

            print(user_log_path)
            print(type(user_log_path))

            try:
                user_log_path = self.file_upload["user_log"].selected
                activity_log_path = self.file_upload["activity_log"].selected
                component_codes_path = self.file_upload["component_codes"].selected

                # Load data using the updated file paths
                self.analysis.load_data(user_log_path, activity_log_path, component_codes_path)
            except Exception as e:
                print(f"Error loading data: {e}")

    def convert_to_JSON(self, _):
        """Handle button click to convert loaded data to JSON format."""
        with self.output:
            self.output.clear_output()  # Clear previous outputs
            try:
                # Attempt to convert the data to JSON
                self.analysis.convert_to_json()
                print("Conversion to JSON successful.")
            except AttributeError as e:
                print("Error: Make sure the data has been loaded before converting to JSON.")
            except Exception as e:
                print(f"Error during conversion to JSON: {e}")


    def clean_and_prepare_data(self, _):
        """Clean and prepare the data."""
        with self.output:
            self.output.clear_output()
            try:
                self.analysis.clean_data()
            except Exception as e:
                print(f"Error during cleaning: {e}")

    def merge_data(self, _):
        """Merge datasets."""
        with self.output:
            self.output.clear_output()
            print("starting merge")
            try:
                self.analysis.merge_data()
            except Exception as e:
                print(f"Error during merging: {e}")

    def reshape_data(self, _):
        """Reshape datasets."""
        with self.output:
            self.output.clear_output()
            try:
                self.analysis.reshape_data()
            except Exception as e:
                print(f"Error during reshaping: {e}")

    def count(self, _):
      with self.output:
          self.output.clear_output()  # Clear previous outputs
          try:
              # Attempt to convert the data to JSON
              self.analysis.count_data()
              print("Conversion to JSON successful.")
          except AttributeError as e:
              print("Error: Make sure the data has been loaded before converting to JSON.")
          except Exception as e:
              print(f"Error during conversion to JSON: {e}")

    def backup_to_file(self, _):
        """Backup to file."""
        with self.output:
            self.output.clear_output()
            try:
                self.analysis.backup_files()
            except Exception as e:
                print(f"Error during backup to file: {e}")

    def backup_to_db(self, _):
        """Backup to database."""
        with self.output:
            self.output.clear_output()
            try:
                self.analysis.backup_to_db()
            except Exception as e:
                print(f"Error during backup to database: {e}")

    def generate_statistics(self, _):
        """Generate statistics."""
        with self.output:
            self.output.clear_output()
            try:
                statistics = self.analysis.generate_statistics()
                print(f"Generated Statistics: {statistics}")
            except Exception as e:
                print(f"Error during statistics generation: {e}")

    def generate_correlation(self, _):
        """Generate correlation heatmap."""
        with self.output:
            self.output.clear_output()
            try:
                self.analysis.generate_correlation()
            except Exception as e:
                print(f"Error during correlation generation: {e}")

    def display_gui(self):
        """Display the interactive GUI with buttons arranged in one column."""
        buttons = VBox([
            self.file_upload["user_log"],  # Upload user_log
            self.file_upload["activity_log"],  # Upload activity_log
            self.file_upload["component_codes"],  # Upload component_codes
            self.load_button,  # Load data
            self.convert_json_button,  # Convert to JSON button
            self.merge_button,  # Merge data
            self.clean_button,  # Clean data
            self.reshape_button,  # Reshape data
            self.count_button, # Count user
            self.backup_file_button,  # Backup to file
            self.backup_db_button,  # Backup to database
            self.stats_button,  # Generate stats
            self.correlation_button,  # Generate correlation
            self.output  # Output area for displaying results
        ])
        display(buttons)



class Main:
    def __init__(self):
        self.app = App()

    def run(self):
        self.app.display_gui()

# Run the application
main = Main()
main.run()


VBox(children=(FileChooser(path='/content/drive/MyDrive/paul_project', filename='', title='Select User Log', s…

In [None]:
# TODO
# Backup einlesen
# Remove button
# dataframes zeigen bei den einzelnen merges
# Reihenfolge Buttons
# edge case checken