# Import the necessary libraries and define functions for mass processing

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
from collections import Counter
import py7zr

# def get_data_files(directory):
#     """
#     Get a list of Excel files in a directory.
#     
#     :param directory: Path to the directory
#     :return: List of Excel files
#     """
#     return [os.path.join(directory, file) for file in os.listdir(directory) if file.endswith(".xlsx")]
# 
# 
# def process_survey_data_to_df(directory="./data/", column_index=7):
#     """
#     Process survey data from multiple Excel files.
#     
#     :param directory: Path to the directory containing the Excel files
#     :param column_index: Index of the column containing the survey responses
#     :return: A DataFrame with processed responses
#     """
#     excel_files = get_data_files(directory)
#     all_counters = []
#     
#     for file in excel_files:
#         df = pd.read_excel(file)
#         responses = df.iloc[:, column_index].dropna()
#         
#         counter = Counter()
#         for response in responses:
#             if isinstance(response, str):
#                 choices = [choice.strip() for choice in response.split(';') if choice.strip()]
#                 counter.update(choices)
#                 
#         all_counters.append(counter)
#         
#     combined_counter = sum(all_counters, Counter())
#     
#     
#     df = pd.DataFrame.from_dict(combined_counter, orient='index', columns=['Total'])
#     df = df.sort_values(by='Total', ascending=False)
#     
#     
#     for i, counter in enumerate(all_counters):
#         df[f'Survey_{i+1}'] = df.index.map(counter)
#         
#     return df

def process_survey_data(file_path, column_index):
    """
    Process survey data from an Excel file.

    :param file_path: Path to the Excel file
    :param column_index: Index of the column containing the survey responses
    :return: A Counter object with processed responses
    """
    df = pd.read_excel(file_path)
    responses = df.iloc[:, column_index].dropna()

    counter = Counter()
    for response in responses:
        if isinstance(response, str):
            choices = [choice.strip() for choice in response.split(';') if choice.strip()]
            counter.update(choices)

    return counter

def combine_survey_results(*counters):
    """
    Combine results from multiple surveys.

    :param counters: Counter objects from different surveys
    :return: A combined Counter object
    """
    combined = Counter()
    for counter in counters:
        combined.update(counter)
    return combined

def plot_survey_results(counter, title, output_file_base):
    """
    Plot survey results as a horizontal bar chart using Seaborn.

    :param counter: Counter object with survey results
    :param title: Title for the chart
    :param output_file_base: File name for saving the chart
    """
    sorted_result = dict(sorted(counter.items(), key=lambda x: x[1], reverse=True))
    values = list(sorted_result.values())
    keys = list(sorted_result.keys())


    plt.figure(figsize=(12, max(8, len(sorted_result) * 0.4)))
    sns.set_style("ticks")
    sns.despine()
    sns.color_palette()
    ax = sns.barplot(x=values, y=keys, orient='h', palette="deep", hue=values)
    ax.legend_.remove()

    ax.set_title(title, fontsize=16)
    ax.set_xlabel("Vastausten määrä", fontsize=12)
    ax.set_ylabel("Vaihtoehdot", fontsize=12)

    for i, v in enumerate(sorted_result.values()):
        ax.text(v, i, f' {v}', va='center', fontsize=10)

    ax.xaxis.set_major_locator(ticker.MaxNLocator(integer=True))

    plt.tight_layout()
    plt.savefig(f"{output_file_base}.pdf", format='pdf', dpi=300, bbox_inches='tight')
    plt.savefig(f"{output_file_base}.png", format='png', dpi=300, bbox_inches='tight')
    plt.savefig(f"{output_file_base}.svg", format='svg', bbox_inches='tight')
    plt.close()
    
    
def create_archive(file_path, archive_path):
    """
    Create a 7z archive from a file.
    
    :param file_path: Path to the file to be archived
    :param archive_path: Path to the archive file
    """
    with py7zr.SevenZipFile(archive_path, 'w') as archive:
        archive.writeall(file_path)


# Process the data from the surveys

In [None]:
# survey_df = process_survey_data_to_df()




survey1 = process_survey_data("data/hallitus.xlsx", 7)
survey2 = process_survey_data("data/killat.xlsx", 7)
survey3 = process_survey_data("data/kiltojen_jäsenet.xlsx", 7)

# Plot individual survey results

In [None]:
plot_survey_results(survey1, "Turkulaiseen teekkariuteen yhdistetyt arvot - TK:n hallitus", "./plots/hallitus/arvot")
plot_survey_results(survey2, "Turkulaiseen teekkariuteen yhdistetyt arvot - Killat", "./plots/killat/arvot")
plot_survey_results(survey3, "Turkulaiseen teekkariuteen yhdistetyt arvot - Kiltojen jäsenet", "./plots/jasenet/arvot")

# Combine the results from all surveys and plot the combined results

In [None]:
combined_results = combine_survey_results(survey1, survey2, survey3)
plot_survey_results(combined_results, "Turkulaiseen teekkariuteen yhdistetyt arvot - Yhdistetyt tulokset", "./plots/yhdistetyt/arvot")

# Create an archive of the plots

In [None]:
create_archive("plots", "./target/plots.7z")