In [1]:
# Check the python version
!python --version

Python 3.10.9


In [2]:
import pandas as pd

import numpy as np

import scipy.stats

import seaborn as sns

#File input/output
import os

import time

#JSON file handling
import json

#Data visualisation
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

#GUI
import tkinter as tk
from tkinter import ttk
from tkinter import *
from tkinter import filedialog
from tkinter import messagebox
from tkinter.filedialog import asksaveasfile
from tkinter.filedialog import asksaveasfilename

#Date and Time
from datetime import datetime

# Global Variables

In [3]:
#Handle missing values
missing_values = ["NaN", "", " "]
df = None
drop_ngr_values = ['NZ02553847', 'SE213515', 'NT05399374', 'NT25265908']
dab_multiplexes = ['C18A', 'C18F', 'C188']

json_data = None

# Load the Data

In [4]:
# Function to load the CSV file and convert them to JSON
def load_csv_files():
    global df

    file_paths = filedialog.askopenfilenames(title="Select CSV Files", filetypes=[("CSV Files", "*.csv")])
    if len(file_paths) < 2:
        messagebox.showerror("Error", "Please select at least two CSV files.")
        return

    # Read the first CSV file to initialize the DataFrame
    try:
        df = pd.read_csv(file_paths[0], encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(file_paths[0], encoding='latin')
    
    for file_path in file_paths[1:]:
        try:
            combined_df = pd.read_csv(file_path, encoding='utf-8')
        except UnicodeDecodeError:
            combined_df = pd.read_csv(file_path, encoding='latin')

        # Merge based on 'id' column
        df = df.merge(combined_df, how='outer', on='id')

    # Generate a unique filename for backup
    backup_filename = f"data_{time.strftime('%Y%m%d%H%M%S')}.json"
    
    # Translate DataFrame to JSON format
    json_data = df.to_json(orient="records")

    # Save the JSON data to a file (for backup)
    with open(backup_filename, "w") as file:
        file.write(json_data)

    # Provide feedback to the user
    status_label.config(text="CSV files loaded and converted to JSON format successfully!")

# Clean the Data

In [5]:
# Function to clean the data
def clean_data():
    global df

    # Drop columns with no data
    df.dropna(axis=1, how='all', inplace=True)

    # Handling missing values for float64 columns
    for column in df.columns:
        if df[column].dtype == 'float64':
            df[column].fillna(0, inplace=True)
            
    # Convert 'In-Use ERP Total' column to float
    df['In-Use ERP Total'] = df['In-Use ERP Total'].str.replace(',', '').astype(float)
    
    # Round the 'In-Use ERP Total' column to 2 decimal places
    df['In-Use ERP Total'] = df['In-Use ERP Total'].round(2)


In [6]:
# Function to clean the data and display the first five rows
def clean_and_display_data():
    global df

    # Check if data is loaded
    if df is None:
        messagebox.showerror("Error", "No data available. Please load data first.")
        return

    # Clean the data
    clean_data()
    
    # Display the cleaned data in the output text area
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.insert(tk.END, df.head(5).to_string(index=False))
    output_text.insert(tk.END, f"\n\nNumber of Rows: {df.shape[0]}\nNumber of Columns: {df.shape[1]}")
    output_text.config(state=tk.DISABLED)
    
    # Provide feedback to the user
    status_label.config(text="Data cleaned successfully!")

In [7]:
import threading

# Function to load the CSV file and convert them to JSON
def load_csv_files():
    global df
    
    file_paths = filedialog.askopenfilenames(title="Select CSV Files", filetypes=[("CSV Files", "*.csv")])
    if len(file_paths) < 2:
        messagebox.showerror("Error", "Please select at least two CSV files.")
        return

    # Read the first CSV file to initialize the DataFrame
    try:
        df = pd.read_csv(file_paths[0], encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(file_paths[0], encoding='latin')
        
    # Create a thread for each file loading
    threads = []
    for file_path in file_paths[1:]:
        thread = threading.Thread(target=load_csv_thread, args=(file_path,))
        threads.append(thread)
        thread.start()

    # Wait for all threads to complete
    for thread in threads:
        thread.join()

    # Generate a unique filename for backup
    backup_filename = f"data_{time.strftime('%Y%m%d%H%M%S')}.json"
    
    # Translate DataFrame to JSON format
    json_data = df.to_json(orient="records")

    # Save the JSON data to a file (for backup)
    with open(backup_filename, "w") as file:
        file.write(json_data)

    # Provide feedback to the user
    status_label.config(text="CSV files loaded and converted to JSON format successfully!")
    

# Thread function for loading CSV files
def load_csv_thread(file_path):
    global df

    try:
        combined_df = pd.read_csv(file_path, encoding='utf-8')
    except UnicodeDecodeError:
        combined_df = pd.read_csv(file_path, encoding='latin')

    # Merge based on 'id' column
    with thread_lock:
        df = df.merge(combined_df, how='outer', on='id')

# Function to clean the data
def clean_data():
    global df

    # Drop columns with no data
    df.dropna(axis=1, how='all', inplace=True)

    # Handling missing values for float64 columns
    for column in df.columns:
        if df[column].dtype == 'float64':
            df[column].fillna(0, inplace=True)
            
    # Convert 'In-Use ERP Total' column to float
    df['In-Use ERP Total'] = df['In-Use ERP Total'].str.replace(',', '').astype(float)
    
    # Round the 'In-Use ERP Total' column to 2 decimal places
    df['In-Use ERP Total'] = df['In-Use ERP Total'].round(2)

# Thread-safe lock
thread_lock = threading.Lock()

In [8]:
# Function to drop the specified NGR values
def drop_ngr_values():
    global df

    ngr_input = ngr_entry.get().strip()
    if ngr_input:
        drop_ngr_values = [ngr.strip() for ngr in ngr_input.split(',')]
    else:
        drop_ngr_values = ['NZ02553847', 'SE213515', 'NT05399374', 'NT25265908']

    # Filter and display the rows with the specified NGR values
    rows_with_ngr = df[df['NGR'].isin(drop_ngr_values)]

    # Clear the output text area and display the new data
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.insert(tk.END, rows_with_ngr.to_string(index=False))
    output_text.insert(tk.END, f"\n\nNumber of Rows: {rows_with_ngr.shape[0]}\nNumber of Columns: {rows_with_ngr.shape[1]}")
    output_text.config(state=tk.DISABLED)

    # Enable the confirm button
    confirm_button.config(state=tk.NORMAL)

In [9]:
# Function to confirm and drop the rows with the specified NGR values
def confirm_drop_rows():
    global df

    ngr_input = ngr_entry.get().strip()
    if ngr_input:
        drop_ngr_values = [ngr.strip() for ngr in ngr_input.split(',')]
    else:
        drop_ngr_values = ['NZ02553847', 'SE213515', 'NT05399374', 'NT252675908']

    # Drop the rows with the specified NGR values
    df = df[~df['NGR'].isin(drop_ngr_values)]

    # Disable the confirm button
    confirm_button.config(state=tk.DISABLED)
    
     # Disable the confirm button
    confirm_button.config(state=tk.DISABLED)

    # Clear the output text area and display the cleaned data
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.insert(tk.END, df.head(10).to_string(index=False))
    output_text.insert(tk.END, f"\n\nNumber of Rows: {df.shape[0]}\nNumber of Columns: {df.shape[1]}")
    output_text.config(state=tk.DISABLED)
    
    # Provide feedback to the user
    status_label.config(text="Rows dropped successfully! Data cleaning is complete! You can now move on to Reshaping the Data")

# Reshaping Data

In [10]:
# Function to extract DAB multiplex information from 'EID' column
def get_dab_info(eid_value, multiplex):
    if isinstance(eid_value, str) and multiplex in eid_value:
        split_values = eid_value.split(',')
        index = dab_multiplexes.index(multiplex)
        if index < len(split_values):
            return split_values[index].strip()
    return ''

In [11]:
# Function to create new columns for DAB multiplexes
def create_dab_columns():
    global df, dab_multiplexes

    # Get user input for DAB multiplex columns
    dab_columns_input = dab_multiplex_entry.get().strip()
    if dab_columns_input:
        dab_columns = [column.strip() for column in dab_columns_input.split(',')]
    else:
        dab_columns = dab_multiplexes

    # Extract DAB multiplex information from 'EID' column to new columns
    for column in dab_columns:
        df[column] = df['EID'].apply(lambda x: get_dab_info(x, column))

     # Clear the output text area and display the cleaned data
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.insert(tk.END, df.head(10).to_string(index=False))
    output_text.insert(tk.END, f"\n\nNumber of Rows: {df.shape[0]}\nNumber of Columns: {df.shape[1]}")
    output_text.config(state=tk.DISABLED)
    
    # Provide feedback to the user
    status_label.config(text="DAB multiplex columns created successfully!")

In [12]:
# Function to calculate and display ERP statistics
def calculate_erp_statistics():
    global df

    # Check if data is available and new_df is created
    if df is None:
        messagebox.showerror("Error", "No data available. Please load data first.")
        return

    # Calculate mean, mode, and median for 'Power(kW)'
    mean_erp = df['Power(kW)'].mean()
    mode_erp = df['Power(kW)'].mode().iloc[0]
    median_erp = df['Power(kW)'].median()

    # Display the calculated statistics in the output text area
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.insert(tk.END, f"Mean Power(kW): {mean_erp:.2f}\n")
    output_text.insert(tk.END, f"Mode Power(kW): {mode_erp:.2f}\n")
    output_text.insert(tk.END, f"Median Power(kW): {median_erp:.2f}\n")
    output_text.config(state=tk.DISABLED)

    # Provide feedback to the user
    status_label.config(text="ERP statistics calculated and displayed successfully!")


In [13]:
def extract_and_join():
    global df, dab_multiplexes

    # Check if data is loaded
    if df is None:
        messagebox.showerror("Error", "No data available. Please load data first.")
        return

    # Filter the DataFrame to get rows with specified EID values
    selected_rows = df[df['EID'].isin(dab_multiplexes)]

    # Create a new DataFrame with desired columns
    selected_columns = ['id', 'Date','EID', 'NGR', 'Site', 'Site Height', 'In-Use Ae Ht', 'In-Use ERP Total']
    new_df = selected_rows[selected_columns].copy()

    # Rename columns
    new_df.rename(columns={'In-Use Ae Ht': 'Aerial height(m)', 'In-Use ERP Total': 'Power(kW)'}, inplace=True)

    # Reset the index of the new DataFrame
    new_df.reset_index(drop=True, inplace=True)

    # Update the output text area with the content of the new DataFrame
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.insert(tk.END, new_df.to_string(index=False))
    output_text.insert(tk.END, f"\n\nNumber of Rows: {new_df.shape[0]}\nNumber of Columns: {new_df.shape[1]}")
    output_text.config(state=tk.DISABLED)

    # Provide feedback to the user
    status_label.config(text="New DataFrame created successfully!")

    return new_df

# Statistical Analysis

In [14]:
# Function to calculate and display ERP statistics
def erp_statistics():
    global df

    # Check if data is available and new_df is created
    if df is None:
        messagebox.showerror("Error", "No data available. Please load data first.")
        return
    
    # Filter the DataFrame to get rows with specified EID values
    selected_rows = df[df['EID'].isin(dab_multiplexes)]

    # Create a new DataFrame with desired columns
    selected_columns = ['id','EID', 'NGR', 'Site', 'Site Height', 'In-Use Ae Ht', 'In-Use ERP Total']
    new_df = selected_rows[selected_columns].copy()
    
    # Rename columns
    new_df.rename(columns={'In-Use Ae Ht': 'Aerial height(m)', 'In-Use ERP Total': 'Power(kW)'}, inplace=True)

    # Calculate mean, mode, and median for 'Power(kW)'
    mean_erp = new_df['Power(kW)'].mean()
    mode_erp = new_df['Power(kW)'].mode().iloc[0]
    median_erp = new_df['Power(kW)'].median()

    # Display the calculated statistics in the output text area
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.insert(tk.END, f"Statistics for Power(kW):\n")
    output_text.insert(tk.END, f"Mean Power(kW): {mean_erp:.2f}\n")
    output_text.insert(tk.END, f"Mode Power(kW): {mode_erp:.2f}\n")
    output_text.insert(tk.END, f"Median Power(kW): {median_erp:.2f}\n")
    output_text.config(state=tk.DISABLED)

    # Provide feedback to the user
    status_label.config(text="ERP statistics calculated and displayed successfully!")

In [15]:
# Function to calculate and display Site Height statistics
def site_height_statistics():
    global df

    # Check if data is available and new_df is created
    if df is None:
        messagebox.showerror("Error", "No data available. Please load data first.")
        return
    
    # Filter the DataFrame to get rows with specified EID values
    selected_rows = df[df['EID'].isin(dab_multiplexes)]

    # Create a new DataFrame with desired columns
    selected_columns = ['id','EID', 'NGR', 'Site', 'Site Height', 'In-Use Ae Ht', 'In-Use ERP Total']
    new_df = selected_rows[selected_columns].copy()

    # Rename columns
    new_df.rename(columns={'In-Use Ae Ht': 'Aerial height(m)', 'In-Use ERP Total': 'Power(kW)'}, inplace=True)

    # Filter new_df for Site Height > 75
    filtered_df = new_df[new_df['Site Height'] > 75]

    # Calculate mean, mode, and median for Site Height
    mean_site_height = filtered_df['Site Height'].mean()
    mode_site_height = filtered_df['Site Height'].mode().iloc[0]
    median_site_height = filtered_df['Site Height'].median()

    # Display the calculated statistics in the output text area
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.insert(tk.END, f"Statistics for Site Height > 75:\n")
    output_text.insert(tk.END, f"Mean Site Height: {mean_site_height:.2f}\n")
    output_text.insert(tk.END, f"Mode Site Height: {mode_site_height:.2f}\n")
    output_text.insert(tk.END, f"Median Site Height: {median_site_height:.2f}\n")
    output_text.config(state=tk.DISABLED)

    # Provide feedback to the user
    status_label.config(text="Site Height statistics calculated and displayed successfully!")

In [16]:
# Function to calculate and display Date statistics
def date_statistics():
    global df

    # Check if data is available and new_df is created
    if df is None:
        messagebox.showerror("Error", "No data available. Please load data first.")
        return
    
    # Filter the DataFrame to get rows with specified EID values
    selected_rows = df[df['EID'].isin(dab_multiplexes)]

    # Create a new DataFrame with desired columns
    selected_columns = ['id', 'Date','EID', 'NGR', 'Site', 'Site Height', 'In-Use Ae Ht', 'In-Use ERP Total']
    new_df = selected_rows[selected_columns].copy()

    # Rename columns
    new_df.rename(columns={'In-Use Ae Ht': 'Aerial height(m)', 'In-Use ERP Total': 'Power(kW)'}, inplace=True)
    
    # Convert 'Date' column to datetime
    new_df['Date'] = pd.to_datetime(new_df['Date'], dayfirst=True)
    
    # Filter new_df for Date from 2001 onwards
    filtered_df = new_df[new_df['Date'].dt.year >= 2001]

    # Calculate mean, mode, and median for Date
    mean_date = filtered_df['Date'].mean()
    mode_date = filtered_df['Date'].mode().iloc[0]
    median_date = filtered_df['Date'].median()

    # Display the calculated statistics in the output text area
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.insert(tk.END, f"Statistics for Date from 2001 onwards:\n")
    output_text.insert(tk.END, f"Mean Date: {mean_date}\n")
    output_text.insert(tk.END, f"Mode Date: {mode_date}\n")
    output_text.insert(tk.END, f"Median Date: {median_date}\n")
    output_text.config(state=tk.DISABLED)

    # Provide feedback to the user
    status_label.config(text="Date statistics calculated and displayed successfully!")

In [17]:
# Visualisations

In [18]:
# Function to clear the output area
def clear_output_area():
    
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.config(state=tk.DISABLED)

In [19]:
# Function to create and display the DataFrame 
def view_dab_multiplexes():
    
    # Filter the DataFrame to get rows with specified EID values
    selected_rows = df[df['EID'].isin(dab_multiplexes)]
    
    # Create a new DataFrame with desired columns
    new_columns = ['Site', 'Freq.', 'Block', 'Serv Label1 ', 'Serv Label2 ', 'Serv Label3 ', 'Serv Label4 ', 'Serv Label10 ']
    new_df = selected_rows[new_columns].copy()
    
    # Group data by 'Site' and calculate count for each category
    new_df_grouped = new_df.groupby('Site').size().reset_index(name='Count')
    
    # Update the output text area with the content of the new DataFrame
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    output_text.insert(tk.END, new_df.to_string(index=False))
    output_text.insert(tk.END, f"\n\nNumber of Rows: {new_df.shape[0]}\nNumber of Columns: {new_df.shape[1]}")
    output_text.config(state=tk.DISABLED)
    
    # Provide feedback to the user
    status_label.config(text="New DataFrame created and displayed successfully!")

In [20]:
# Function to plot the data to a graph (Heatmap)
def plot_dab_multiplexes():
    
    # Clear the output area
    clear_output_area()
    
    # Filter the DataFrame to get rows with specified EID values
    selected_rows = df[df['EID'].isin(dab_multiplexes)]
    
    # Create a new DataFrame with desired columns
    new_columns = ['Site', 'Freq.', 'Block', 'Serv Label1 ', 'Serv Label2 ', 'Serv Label3 ', 'Serv Label4 ', 'Serv Label10 ']
    new_df = selected_rows[new_columns].copy()
    
    # Pivot the DataFrame to have 'Serv Label' as columns and 'Site' as index
    pivoted_df = new_df.pivot_table(index='Site', columns='Freq.', aggfunc='size', fill_value=0)

    # Create a heatmap using seaborn
    plt.figure(figsize=(6, 6))
    sns.heatmap(pivoted_df, cmap='YlGnBu', annot=True, fmt='d')
    plt.title("DAB Multiplexes Heat Map")
    plt.xlabel("Frequency")
    plt.ylabel("Site")
    plt.tight_layout()
    
   # Clear the output text area and display the plot
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    canvas = FigureCanvasTkAgg(plt.gcf(), master=output_text)
    canvas.draw()
    canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
    output_text.config(state=tk.DISABLED)
    
    # Provide feedback to the user
    status_label.config(text="DAB Multiplexes graph generated successfully!")


In [21]:
# Correlation Analysis

In [22]:
# Function to calculate the correlation significance
def correlation_significance():
    # Filter the DataFrame to get rows with specified EID values
    selected_rows = df[df['EID'].isin(dab_multiplexes)]
    
    # Create a new DataFrame with desired columns
    new_columns = ['Freq.', 'Block', 'Serv Label1 ', 'Serv Label2 ', 'Serv Label3 ', 'Serv Label4 ', 'Serv Label10 ']
    new_df = selected_rows[new_columns].copy()

    # Calculate the correlation matrix with p-values
    correlation_matrix = new_df.corr(method='pearson', numeric_only=True)
    
    # Calculate the p-values for correlations
    p_values = new_df.corr(method=lambda x, y: np.round(scipy.stats.pearsonr(x, y)[1]), numeric_only=True)
    
    # Create a mask for significant correlations
    significance_level = 0.5
    significance_mask = p_values < significance_level
    
    # Update the correlation matrix with non-significant correlations masked out
    correlation_matrix[~significance_mask] = 0
    
    # Convert the correlation matrix to a formatted string
    correlation_text = correlation_matrix.to_string(float_format="{:.2f}".format)
    
    # Determine if there is any significant correlation
    is_significant = significance_mask.any().any()
    
    # Update the output text area with the correlation information
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)
    if is_significant:
        output_text.insert(tk.END, "There is significant correlation between the selected columns.\n")
    else:
        output_text.insert(tk.END, "There is no significant correlation between the selected columns.\n")
    
    # Clear the output area
    clear_output_area()
    
    # Display the correlation information
    output_text.insert(tk.END, correlation_text)
    output_text.config(state=tk.DISABLED)
    
    # Provide feedback to the user
    status_label.config(text="Correlation analysis completed!")


In [23]:
# Visualisation of the correlation significance using a heatmap
def plot_correlation_heatmap():
    
    # Clear the output area
    clear_output_area()
    
    # Filter the DataFrame to get rows with specified EID values
    selected_rows = df[df['EID'].isin(dab_multiplexes)]
    
    # Create a new DataFrame with desired columns
    new_columns = ['Site', 'Freq.', 'Block', 'Serv Label1 ', 'Serv Label2 ', 'Serv Label3 ', 'Serv Label4 ', 'Serv Label10 ']
    new_df = selected_rows[new_columns].copy()

    # Calculate the correlation matrix
    correlation_matrix = new_df.corr()
    
    # Create a heatmap using seaborn
    plt.figure(figsize=(6, 6))
    sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", center=0)
    plt.title("Correlation Heatmap")
   
    # Display the plot in the GUI
    output_text.config(state=tk.NORMAL)
    output_text.delete("1.0", tk.END)  # Clear previous content
    canvas = FigureCanvasTkAgg(plt.gcf(), master=output_text)
    canvas.draw()
    canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
    output_text.config(state=tk.DISABLED)
    
    # Provide feedback to the user
    status_label.config(text="Correlation Heatmap generated successfully!")

# GUI

In [24]:
#Main GUI
root = tk.Tk()
root.title("DAB Data Analysis")
root.geometry("880x650")

# Screen width and height
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()

# Centre the window
x = (screen_width - root.winfo_reqwidth()) // 2
y = (screen_height - root.winfo_reqheight()) // 2
root.geometry(f"+{x}+{y}")

# Notebooks(Tabs)
notebook = ttk.Notebook(root)
notebook.grid(row=0, column=0, columnspan=6)

#Window Tabs
clean_tab = ttk.Frame(notebook)
reshape_tab = ttk.Frame(notebook)
stats_tab = ttk.Frame(notebook)
visualise_tab = ttk.Frame(notebook)

notebook.add(clean_tab, text="Data Loading and Cleaning")
notebook.add(reshape_tab, text="Reshape the DataFrame")
notebook.add(stats_tab, text ="Statistical Analysis")
notebook.add(visualise_tab, text="Data Visualisation and Correlation Analysis")

In [25]:
# Clean Tab

In [26]:
# Button to load CSV files
load_button_label = tk.Label(clean_tab, text="Upload Your CSV Files", relief=tk.FLAT)
load_button = tk.Button(clean_tab, text="Browse", command=load_csv_files)

# Button to clean and display data
clean_button_label= tk.Label(clean_tab, text="Clean and Display the Data")
clean_button = tk.Button(clean_tab, text="Clean", command=clean_and_display_data)

# Layout
load_button_label.grid(row=1, column=0, padx=10, pady=10)
load_button.grid(row=1, column=3, padx=10, pady=10)

clean_button_label.grid(row=2, column=0, padx=10, pady=10)
clean_button.grid(row=2, column=3, padx=10, pady=10)

In [27]:
# Drop NGR Values
# Label, input box, submit and confirmation for NGR values to be dropped
ngr_label = tk.Label(clean_tab, text="Enter NGR Values to be dropped:")
ngr_entry_var = tk.StringVar(value="NZ02553847, SE213515, NT05399374, NT25265908")
ngr_entry = tk.Entry(clean_tab, textvariable=ngr_entry_var)
submit_button = tk.Button(clean_tab, text="Submit", command=drop_ngr_values)
confirm_button = tk.Button(clean_tab, text="Confirm", state=tk.NORMAL, command=confirm_drop_rows)

# Layout the widgets for NGR values
ngr_label.grid(row=4, column=0, padx=10, pady=10)
ngr_entry.grid(row=4, column=1, padx=10, pady=10)
submit_button.grid(row=4, column=2, padx=10, pady=10)
confirm_button.grid(row=4, column=3, padx=10, pady=10)

# Enable the Confirm button
confirm_button.config(state=tk.NORMAL)

In [28]:
# Reshape Tab

In [29]:
# Add DAB multiplex columns
# Label, input, and submit button for inputting DAB multiplex columns
dab_multiplex_label = tk.Label(reshape_tab, text="Enter DAB Multiplex Columns (comma-separated):")
dab_multiplex_entry_var = tk.StringVar(value="C18A, C18F, C188")
dab_multiplex_entry = tk.Entry(reshape_tab, textvariable=dab_multiplex_entry_var)
dab_multiplex_submit_button = tk.Button(reshape_tab, text="Submit", command=create_dab_columns)

# Join DAB multiplex categories to DAB station locations
extract_and_join_label = tk.Label(reshape_tab, text="Extract DAB Multiplexes and Join to DAB Station Locations")
extract_button = tk.Button(reshape_tab, text="Extract and Join Data", command=extract_and_join)


# Layout
dab_multiplex_label.grid(row=1, column=0, padx=10, pady=10, sticky="e")
dab_multiplex_entry.grid(row=1, column=1, padx=10, pady=10, sticky="w")
dab_multiplex_submit_button.grid(row=1, column=2, padx=10, pady=10)

extract_and_join_label.grid(row=2, column=0, padx=10, pady=10, sticky="e")
extract_button.grid(row=2, column=2, padx=10, pady=10)

In [30]:
# Statistical Analysis

In [31]:
# Calculate ERP statistics
erp_statistics_label = tk.Label(stats_tab, text="Calculate ERP Statistics")
erp_statistics_button = tk.Button(stats_tab, text="Calculate", command=erp_statistics)

# Calculate Site Height statistics
site_height_statistics_label = tk.Label(stats_tab, text="Calculate Site Height Statistics")
site_height_statistics_button = tk.Button(stats_tab, text="Calculate", command=site_height_statistics)

# Calculate Date statistics
date_statistics_label = tk.Label(stats_tab, text="Calculate Date Statistics")
date_statistics_button = tk.Button(stats_tab, text="Calculate", command=date_statistics)


# Layout
erp_statistics_label.grid(row=1, column=1, padx=10, pady=10)
erp_statistics_button.grid(row=1, column=4, padx=10, pady=10)

site_height_statistics_label.grid(row=2, column=1, padx=10, pady=10)
site_height_statistics_button.grid(row=2, column=4, padx=10, pady=10)

date_statistics_label.grid(row=3, column=1, padx=10, pady=10)
date_statistics_button.grid(row=3, column=4, padx=10, pady=10)

In [32]:
# Visualisation

In [33]:
# Labels
view_data_for_plot_label = tk.Label(visualise_tab, text="View Data for Plotting")
plot_label = tk.Label(visualise_tab, text="Plot the Graph")
calculate_correlation_label = tk.Label(visualise_tab, text="Calculate Correlation Significance")
correlation_label = tk.Label(visualise_tab, text="Plot Correlation")
clear_label = tk.Label(visualise_tab, text="Clear")

# Buttons
view_data_for_plot_button = tk.Button(visualise_tab, text="View", command=view_dab_multiplexes)
plot_button = tk.Button(visualise_tab, text="Plot", command=plot_dab_multiplexes)
calculate_correlation_button = tk.Button(visualise_tab, text="Calculate", command=correlation_significance)
correlation_button = tk.Button(visualise_tab, text="Plot", command=plot_correlation_heatmap)
clear_button = tk.Button(visualise_tab, text="Clear", command=clear_output_area)

# Layout
view_data_for_plot_label.grid(row=2, column=0, padx=10, pady=10, sticky="w")
view_data_for_plot_button.grid(row=2, column=1, padx=10, pady=10, sticky="e")

plot_label.grid(row=2, column=4, padx=10, pady=10, sticky="w")
plot_button.grid(row=2, column=5, padx=10, pady=10, sticky="e")

calculate_correlation_label.grid(row=3, column=0, padx=10, pady=10, sticky="w")
calculate_correlation_button.grid(row=3, column=1, padx=10, pady=10, sticky="e")

correlation_label.grid(row=3, column=4, padx=10, pady=10, sticky="w")
correlation_button.grid(row=3, column=5, padx=10, pady=10, sticky="e")

clear_label.grid(row=4, column=0, padx=10, pady=10, sticky="w")
clear_button.grid(row=4, column=1, padx=10, pady=10, sticky="e")



In [34]:
# Output text area to display data
output_text = tk.Text(root, wrap="none", height=25, state=tk.DISABLED)
output_text.grid(row=6, column=0, columnspan=6, padx=10, pady=5, sticky="we")

# Status label
status_label = tk.Label(root, text="", bd=1, anchor=tk.W)
status_label.grid(row=5, column=0, columnspan=6, padx=10, pady=5, sticky="we")

# Styling
status_label.config(font=("Arial", 14), fg="blue")
#output_text.config(font=("Helvetica", 10))

In [35]:
root.mainloop()

2023-08-20 18:33:43.629 python[51462:2828491] +[CATransaction synchronize] called within transaction
2023-08-20 18:33:43.817 python[51462:2828491] +[CATransaction synchronize] called within transaction
2023-08-20 18:34:30.242 python[51462:2828491] +[CATransaction synchronize] called within transaction
2023-08-20 18:34:30.370 python[51462:2828491] +[CATransaction synchronize] called within transaction
2023-08-20 18:35:26.215 python[51462:2828491] +[CATransaction synchronize] called within transaction
2023-08-20 18:51:41.425 python[51462:2828491] +[CATransaction synchronize] called within transaction
