In [None]:
# This code block is importing various Python libraries and modules that are commonly used
# for data manipulation, analysis, and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# from mpl_toolkits.mplot3d import Axes3D is importing the Axes3D class from the mpl_toolkits.mplot3d
# to creating 3D plots in Matplotlib
from mpl_toolkits.mplot3d import Axes3D


# Class is designed to load, merge, and analyze data from two CSV files: one for bookings
# and one for sessions.
class DataAnalyzer:
    # Initialization ( __init__ method):
    # It takes two parameters: bookings_file and sessions_file, which are the filenames of the CSV files
    # containing booking and session data.
    def __init__(self, bookings_file, sessions_file):
        # It reads these CSV files into pandas DataFrames: [1] self.df_bookings contains the data from
        # the bookings file self.df_sessions contains the data from the sessions file
        self.df_bookings = pd.read_csv(bookings_file)
        self.df_sessions = pd.read_csv(sessions_file)
        self.df_combined = self.merge_dataframes()

    # Uses pd.merge() with the following parameters: on="booking_id": This specifies that the DataFrames
    # should be merged based on the "booking_id" column, which is assumed to be present in both DataFrames.
    # how="outer": This performs an outer join, which means all records from both DataFrames are kept,
    # even if they don't have a matching "booking_id" in the other DataFrame. Missing values are filled
    # with NaN.
    def merge_dataframes(self):
        return pd.merge(
            self.df_bookings, self.df_sessions, on="booking_id", how="outer"
        )

    # The get_distinct_counts function is calculating and returning the number of unique (distinct)
    # values for bookings, sessions, and searches in the combined dataset.
    def get_distinct_counts(self):
        # distinct_bookings = self.df_combined["booking_id"].nunique() This counts the number of unique
        # values in the "booking_id" column of the combined DataFrame. nunique() is a pandas method that
        # returns the number of unique elements in a Series.
        distinct_bookings = self.df_combined["booking_id"].nunique()
        # distinct_sessions = self.df_combined["session_id"].nunique()Similarly, this counts the number
        # of unique values in the "session_id" column.
        distinct_sessions = self.df_combined["session_id"].nunique()
        # distinct_searches the conditional statement: It first checks if the "search_id" column exists
        # in the combined DataFrame. If it exists, it counts the number of unique values in the
        # "search_id" column.
        distinct_searches = (
            self.df_combined["search_id"].nunique()
            if "search_id" in self.df_combined.columns
            else 0
        )

        return {
            "Bookings": distinct_bookings,
            "Sessions": distinct_sessions,
            "Searches": distinct_searches,
        }

    # This get_counts_by_day function is analyzing the combined dataset to count distinct bookings,
    # sessions, and searches for each day of the week. This checks if the "search_time" column exists
    # in the combined DataFrame. If it doesn't exist, the function returns an empty dictionary, as it
    # can't perform the day-based analysis without timestamp information
    def get_counts_by_day(self):
        if "search_time" not in self.df_combined.columns:
            return {}
        # it creates a new column called "day"
        self.df_combined["day"] = pd.to_datetime(
            self.df_combined["search_time"], format="ISO8601"
        ).dt.day_name()  # The format="ISO8601" parameter expects dates in ISO format (like "2023-12-25T10:30:00")
        # extracts the day name (Monday, Tuesday, etc.)
        days = [
            "Monday",
            "Tuesday",
            "Wednesday",
            "Thursday",
            "Friday",
            "Saturday",
            "Sunday",
        ]
        # # Group by day and count unique IDs
        counts = (  # Counts unique bookings, sessions, and searches for each day
            self.df_combined.groupby("day")  # Groups the data by day
            .agg(
                {
                    "booking_id": "nunique",
                    "session_id": "nunique",
                    "search_id": (
                        "nunique"
                        if "search_id" in self.df_combined.columns
                        else "count"
                    ),
                }
            )
            .reindex(days, fill_value=0)
        )
        # Returns the results as a dictionary where:
        return counts.to_dict(
            "index"
        )  # Values are dictionaries containing counts for bookings, sessions, and searches


# Visualizer class is responsible for creating a 3D bar plot to visualize the data analyzed by the
# DataAnalyzer class. Let's break down its structure and functionality:
class Visualizer:
    def __init__(self, data):
        self.data = data

    def plot_3d_bar(self):
        # The plot_3d_bar method creates the 3D bar plot.
        # Setting up the plot: Creates a new figure with a specified size. Adds a 3D subplot to
        # this figure.
        fig = plt.figure(figsize=(12, 8))
        ax = fig.add_subplot(111, projection="3d")

        days = list(self.data.keys())
        categories = ["booking_id", "session_id", "search_id"]
        # Creating the grid for the 3D plot:
        x = np.arange(len(days))
        y = np.arange(len(categories))
        # meshgrid to create 2D arrays from these 1D arrays, which is necessary for 3D plotting.
        x, y = np.meshgrid(x, y)
        # Creating the data array
        z = np.array([[self.data[day][cat] for day in days] for cat in categories])

        dx = 0.75  # width of bars
        dy = 0.75  # depth of bars
        dz = z.flatten()  # # heights of bars (flattens 2D array to 1D)

        colors = ["r", "g", "b"]  # red, green, blue for different categories

        for i in range(len(x.flat)):
            ax.bar3d(
                x.flat[i],  # x coordinate
                y.flat[i],  # y coordinate
                0,  # starting height (base of bar)
                dx,  # width
                dy,  # depth
                dz[i],  # height of bar
                color=colors[i % len(colors)],  # cycles through colors
                alpha=0.8,  # transparency
            )
        # Rows represent categories (Bookings, Sessions, Searches)
        ax.set_xticks(np.arange(len(days)))
        ax.set_yticks(np.arange(len(categories)))
        ax.set_yticklabels(["Bookings", "Sessions", "Searches"])

        ax.set_xlabel("Days")
        # labelpad=20 parameter to set_ylabel. This will increase the distance between
        # the y-axis label and the y-axis tick labels. You can adjust the value of labelpad to increase
        # r decrease the distance as needed.
        ax.set_ylabel("Categories", labelpad=15)
        # Sets the label for the z-axis to "Count"
        ax.set_zlabel("Count")
        ax.set_title("Distinct Bookings, Sessions, and Searches by Day")
        # Creates a legend for the plot. handles=[plt.Rectangle((0,0),1,1,color=c,alpha=0.8) for
        # c in colors] creates colored rectangles for each category in the legend. l
        # abels=['Bookings', 'Sessions', 'Searches'] sets the text for each item in the legend.
        plt.legend(
            handles=[plt.Rectangle((0, 0), 1, 1, color=c, alpha=0.8) for c in colors],
            labels=["Bookings", "Sessions", "Searches"],
            loc="upper left",
            bbox_to_anchor=(1.1, 1),
        )
        # Adjusts the padding between and around subplots to minimize overlaps.
        plt.tight_layout()
        plt.show()


# The main() function serves as the entry point of the program and orchestrates the overall flow of
# the data analysis and visualization process.
def main():
    # Creates an instance of the DataAnalyzer class, passing the names of two CSV files: "Bookings.csv"
    # and "Sessions.csv".
    analyzer = DataAnalyzer("Bookings.csv", "Sessions.csv")
    # Calls the get_distinct_counts() method of the analyzer to get the number of distinct bookings,
    # sessions, and searches.
    distinct_counts = analyzer.get_distinct_counts()
    # print distinct counts: This line prints the distinct counts for bookings, sessions, and searches
    print("Distinct Counts:")
    for category, count in distinct_counts.items():
        print(f"{category}: {count}")
    # Calls the get_counts_by_day() method to get the counts of bookings, sessions, and searches for
    # each day of the week.
    counts_by_day = analyzer.get_counts_by_day()
    # Visualization check and execution
    if counts_by_day:
        visualizer = Visualizer(counts_by_day)
        visualizer.plot_3d_bar()
    else:
        print("No data available for visualization.")


"""
The primary purpose to control the execution of code blocks, ensuring they only run
when a script is executed directly, not when it's imported aas a module
"""
if __name__ == "__main__":
    main()