# Data Visualization Code
## All the code used within the thesis for data visualization work.
## Data processing code is contained within a separate notebook.

# Literature Review Publication and Data Collection Charts
### Code reads in a specifically formatted Excel file, and creates a chart of publication dates and data collection years encountered in the literature review.
### Utilized in section 3.3.1, for section 4.1.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Read data
publication = pd.read_excel("Data/Book1.xlsx", "Publication Year")
publication = publication.dropna(subset=["year published", "Count"])
data_collection = pd.read_excel("Data/Book1.xlsx", "Data Collection Year")

# Define overall font
cgfont = {"fontname":"Century Gothic"}

### Publication Year

In [None]:
# Create plot
plt.figure(figsize=(15, 10))
plt.gca().set_facecolor("#f0f0f0")
plt.grid(zorder=0, linestyle="-", alpha=0.7)

# Force numeric
publication["year published"] = pd.to_numeric(publication["year published"])

# Sort data by year
publication = publication.sort_values("year published")

# Limit data to most recent full year
publication = publication[publication["year published"] < 2024]

# Plot Line
plt.plot(publication["year published"], publication["Count"], color="blue", zorder=3, linewidth=2)

# Fill Area Under the Line
plt.fill_between(publication["year published"], publication["Count"], color="blue", alpha=0.3, zorder=2)

# Add dots on the line for each year
plt.scatter(publication["year published"], publication["Count"], color="blue", zorder=4, marker="o", s=100)

# Add a trendline
z = np.polyfit(publication["year published"], publication["Count"], 1)
p = np.poly1d(z)
plt.plot(publication["year published"], p(publication["year published"]), color = "Red", linewidth = 2, zorder = 4)

# Set ticks
plt.xticks(rotation=45, ha="center", fontsize=22, **cgfont)
plt.yticks(fontsize=22, **cgfont)

# Title and labels
plt.title("Article Publication Year (1979 - 2023)", fontsize=32, fontweight="bold", **cgfont)
plt.xlabel("Year", fontsize=26, fontweight="bold", **cgfont)
plt.ylabel("Count", fontsize=26, fontweight="bold", **cgfont)

# Add padding to axes
x_padding = 0.5 
y_padding = 2 

# Set X and Y limits
x_min = publication["year published"].min()
x_max = publication["year published"].max()
y_max = publication["Count"].max()
plt.xlim([x_min - x_padding, x_max + x_padding])
plt.ylim([0, y_max + y_padding])

# Tight layout to prevent clipping
plt.tight_layout()

# Export chart
plt.savefig("Output/Publication_year_line_graph.png", dpi=600)

# Show chart
#plt.show()

### Data Collection Years

In [None]:
# Create plot
plt.figure(figsize=(15, 10))
plt.gca().set_facecolor("#f0f0f0")
plt.grid(zorder=0, linestyle="-", alpha=0.7)

# Force numeric
data_collection["year of data collection"] = pd.to_numeric(data_collection["year of data collection"])

# Sort data by year
data_collection = data_collection.sort_values("year of data collection")

# Limit data to most recent full year
data_collection = data_collection[data_collection["year of data collection"] < 2024]

# Plot data line
plt.plot(data_collection["year of data collection"], data_collection["Count"], color="blue", zorder=3, linewidth=2)

# Colour under the line
plt.fill_between(data_collection["year of data collection"], data_collection["Count"], color="blue", alpha=0.3, zorder=2)

# Add back original points
plt.scatter(data_collection["year of data collection"], data_collection["Count"], color="blue", zorder=4, marker="o", s=100)

# Add a trendline
z = np.polyfit(data_collection["year of data collection"], data_collection["Count"], 1)
p = np.poly1d(z)
plt.plot(data_collection["year of data collection"], p(data_collection["year of data collection"]), color = "Red", linewidth = 2, zorder = 4)

# Set ticks
plt.xticks(rotation=45, ha="center", fontsize=22, **cgfont)
plt.yticks(fontsize=22, **cgfont)

# Title and labels
plt.title("Article Data Collection Year (1969 - 2023)", fontsize=32, fontweight="bold", **cgfont)
plt.xlabel("Year", fontsize=26, fontweight="bold", **cgfont)
plt.ylabel("Count", fontsize=26, fontweight="bold", **cgfont)

# Add padding to axes
x_padding = 0.5 
y_padding = 2 

# Set X and Y limits
x_min = data_collection["year of data collection"].min()
x_max = data_collection["year of data collection"].max()
y_max = data_collection["Count"].max()
plt.xlim([x_min - x_padding, x_max + x_padding])
plt.ylim([0, y_max + y_padding])

# Tight layout to prevent clipping
plt.tight_layout()

# Export chart
plt.savefig("Output/Data_Collection_year_line_graph.png", dpi=600)

# Show chart
#plt.show()

# Trade Route Proportion Chart
### Code reads in a formatted Excel file, and creates a stacked bar chart for each continent of the proportions of each IWT trade route segment (supply, transit, and demand).
### Utilized in section 3.3.2, for section 4.3.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.font_manager import FontProperties

# Set global font
rcParams["font.family"] = "Century Gothic"

# Load excel data (same data from QGIS choropleth visualizations)
data = pd.read_excel("Data/TR_Proportions.xlsx")

# Group data by "Region", then count occurrances for each trade route segment
grouped_data = data.groupby(["Region"])[["Supply", "Transit", "Demand"]].sum()

# Convert counts into percentages
grouped_data_percentage = grouped_data.div(grouped_data.sum(axis=1), axis=0) * 100

# Create the plot
fig, ax = plt.subplots(figsize=(16, 9))

# Manually set the bar chart order
custom_order = ["Supply", "Transit", "Demand"]

# Set colours for each trade route component (to match with choropleth maps)
role_colours = {
    "Supply": "#73b2d8",
    "Transit": "#7bc77c",
    "Demand": "#fb7050"
}

# Plot the data as a stacked bar chart
grouped_data_percentage[custom_order].plot(kind="bar", stacked=True, ax=ax, color=[role_colours[role] for role in custom_order], width=0.8, zorder=2)

# Change figure component colours
fig.patch.set_facecolor("white") 
ax.set_facecolor("#f0f0f0")

# Add title
fig.suptitle("Continental Distribution of Encountered Study Locations", fontsize=24, fontweight="bold", y=0.95, ha="center")

# Add subtitle
plt.figtext(0.5, 0.89, "Summarized by observations of countries per continent", fontsize=14, fontweight="light", ha="center")

# Adjust spacing
fig.subplots_adjust(top=0.88)

# Add X and Y axis labels
ax.set_xlabel("Continent", fontsize=18, fontweight="bold")
ax.set_ylabel("Percentage (%)", fontsize=18, fontweight="bold")

# Modify X axis tick labels
ax.set_xticklabels(grouped_data_percentage.index, rotation=45, ha="right", fontsize=17)
ax.tick_params(axis="y", labelsize=17)

# Create manual legend to follow the same order as the bar chart
handles = [plt.Rectangle((0, 0), 1, 1, color=role_colours[role]) for role in custom_order]
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), title="Role", title_fontproperties=FontProperties(weight="bold"), loc="upper right", fontsize=12)

# Add grid lines
ax.grid(True, color="darkgrey", linestyle="-", linewidth=0.5, zorder=0)

# Add 50% horizontal line
ax.axhline(y=50, color="#1a1b1c", linestyle="--", linewidth=1, zorder=2)

# Export plot
plt.savefig("Output/stacked_bar_chart_roles_by_region.png", dpi=600, bbox_inches="tight")

# plt.show()

# Top Taxonomic Charts
### Code reads in a formatted Excel file with a number of sheets, then for each sheet creates and exports a bar chart.
### Utilized in section 3.3.3, for sections 4.3.2 - 4.3.4.

In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.font_manager import FontProperties

# Century Gothic for all fonts
rcParams["font.family"] = "Century Gothic"
mpl.rcParams["mathtext.fontset"] = "custom"
mpl.rcParams["mathtext.rm"] = "Century Gothic"
mpl.rcParams["mathtext.it"] = "Century Gothic:italic"

# Get path and sheet names for the data
file = pd.ExcelFile("Data/chart_data.xlsx")
sheet_names = file.sheet_names

# Iterate through all sheets
for sheet_name in sheet_names:
        
        # Read data per sheet
        df = pd.read_excel(file, sheet_name=sheet_name)
        
        # Ignore not specified
        df = df[df["Taxa"].str.lower() != "not specified"]
        
        # Keep only top 20 most mentioned taxa
        top_20 = df.sort_values(by="Count", ascending=False).head(20)
        
        # Get unique kingdoms and create colours for each
        all_kingdoms = {
            "Animalia": "#ffa411",
            "Plantae": "#616c00",
            "Fungi": "#7405D7"
        }
        unique_kingdoms = top_20["Kingdom"].unique()
        kingdoms = {taxa:colour for taxa, colour in all_kingdoms.items() if taxa in unique_kingdoms}

        #get modifier
        modifier = sheet_name.split("_", 1)[-1]
        
        # Bar chart
        fig, ax = plt.subplots(figsize=(12, 8), facecolor="white")
        ax.bar(
            x=top_20["Taxa"], 
            height=top_20["Count"], 
            color=top_20["Kingdom"].map(kingdoms),
            zorder=2
        )
        
        # Change background colour
        ax.set_facecolor("#f0f0f0")
        ax.grid(zorder=0, linestyle="-", alpha=0.7)

        # Plot legend if more than one kingdom is present
        if len(kingdoms) > 1 and (modifier == "U" or modifier == "Species"):
            # Legend
            legend_handles = [
                plt.Rectangle((0, 0), 1, 1, color=color) for kingdom, color in kingdoms.items()
            ]
            plt.legend(
                legend_handles,
                kingdoms,
                title="Kingdom",
                title_fontproperties=FontProperties(weight="bold"), 
                loc="upper right",
                fontsize=12
            )
        
        # Make room for taxa image
        max_count = top_20["Count"].max()
        plt.ylim(0, max_count * 1.2) 
        
        # Remove the indicator I used in the source data
        rank = sheet_name.replace("_U", "").replace("_S", "")
        
        #print(modifier)
        
        # Give each chart a title
        if len(top_20)==20:
            title1 = f"Top 20 IWT Observations of Rank {rank}" 
        else:
            title1 = f"All IWT Observations of Rank {rank}" 
        #plt.suptitle(title, fontsize=20, fontweight="bold", y=0.97, ha="center")
        
        if modifier == "U" or modifier == "Species":
            title2 = "Summarized by unique observations per article"
        else:
            title2 = "Summarized at the lowest taxonomic rank per article"
        #plt.title("", fontsize=13, fontweight="light", loc="center", y=0.95, x=0.5)  

        plt.suptitle(f"{title1}\n${title2.replace(" ", "\u00A0")}$", fontsize=20, fontweight="bold", y=0.98, ha="center")
        
        plt.subplots_adjust(top=0.80, bottom=0.15)


        labels = [rf"$\mathit{{{taxa.replace(" ", "\u00A0")}}}$ ({common})" for taxa, common in zip(top_20["Taxa"], top_20["Common Name"])]
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=12)
        #plt.xlabel("Taxon", fontsize=14, fontweight="bold")
        plt.ylabel("Count", fontsize=14, fontweight="bold")
        plt.xticks(rotation=45, ha="right") 
        plt.tight_layout()
        
        #plt.show()
        
        # Export each chart
        export = f"Output/{sheet_name}.png" 
        plt.savefig(export, dpi=600)
        
        # Close plot
        plt.close()

# IWT Product Type Proportion Charts
### Code reads in a specifically formatted Excel file, and creates a chart based on the proportions of IWT products encountered in the literature review, per species.
### Utilized in section 3.3.4, for sections 4.4 and Appendix 3.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.font_manager import FontProperties

# Load the Excel file
df = pd.read_excel("Data/Book2.xlsx")

# Fix necessary column text
df["species"] = df["species"].astype(str).str.strip().str.lower()
df["animal product type"] = df["animal product type"].astype(str).str.strip()
df["label"] = df["label"].astype(str).str.strip()

# Remove missing product types
df = df[df["animal product type"] != "not specified"]

# Get unique species-product combinations per label
df_unique = df.drop_duplicates(subset=["label", "species", "animal product type"])

# Count the species-product combinations
product_type_counts = df_unique.groupby(["species", "animal product type"]).size().reset_index(name="count")

# Fix species names
product_type_counts["species"] = product_type_counts["species"].str.capitalize()

# Change product category labels
product_type_counts["animal product type"] = product_type_counts["animal product type"].replace({"whole":"Live Animal", "derivative":"Dead Animal/Animal Derivative", "not specified":"Unknown"})

# Export data (for reference)
product_type_counts.to_excel("Output/AnimalProductSummary.xlsx", index=False)

# Set global font
rcParams["font.family"] = "Century Gothic"

# Set colours for each product category
custom_colours = {
    "Live Animal": "#09899b",
    "Dead Animal/Animal Derivative": "#765710"
}

# Per species
for species in product_type_counts["species"].unique():
        
    # Get only the relevant data for that species
    species_df = product_type_counts[product_type_counts["species"] == species].copy()
    
    # Convert counts into percentages
    total_count = species_df["count"].sum()
    species_df.loc[:, "Percent"] = ((species_df["count"] / total_count) * 100)
    
    # Create the plot
    fig, ax = plt.subplots(figsize=(5, 6), facecolor="white")
    bottom = None
    
    # Get product types
    product_types = species_df["animal product type"].unique()
    
    # Create manual legend to follow the same order as the bar chart
    legend_handles = []
    legend_labels = []
    
    # For each product type
    for product_type in product_types:
        
        # Get relevant data
        product_data = species_df[species_df["animal product type"] == product_type]
        
        # Get proportion
        proportion = product_data["Percent"].values
        
        # Create the bar
        ax.bar(
            [species],
            proportion,
            bottom=bottom,
            color=custom_colours[product_type],
            label=product_type,
            zorder=2
        )
        
        # Create legend
        legend_handles.append(plt.Rectangle((0, 0), 1, 1, color=custom_colours[product_type]))
        legend_labels.append(product_type)
        
        # Handle bar stacking
        if bottom is None:
            bottom = proportion
        else:
            bottom += proportion
        
    # Change figure component colours
    ax.set_facecolor("#f0f0f0")
    ax.grid(zorder=0, linestyle="-", alpha=0.7)
    
    # Arrange legend
    sorted_labels = sorted(legend_labels, reverse=True)
    sorted_handles = [legend_handles[legend_labels.index(label)] for label in sorted_labels]
    
    # Plot legend
    plt.legend(
        sorted_handles,
        sorted_labels,
        title="Animal Product Type",
        title_fontproperties=FontProperties(weight="bold"),
        loc="upper right",
        fontsize=10
    )
    
    # Adjust limits
    plt.ylim(0, 105)
    
    # Add axis components
    plt.ylabel("Percentage (%)", fontsize=14, fontweight="bold")
    plt.xticks()
    
    # Add title
    plt.title(f"IWT Product Type Proportions\nfor {species}", fontsize=16, fontweight="bold")
    plt.tight_layout()
    
    # Add 50% horizontal line
    ax.axhline(y=50, color="#1a1b1c", linestyle="--", linewidth=1, zorder=2)
    
    # Export plot
    plt.savefig(f"Output/{species}.png", dpi=600)
    plt.close()
    #plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.font_manager import FontProperties

# Set global font
rcParams["font.family"] = "Century Gothic"

# Load excel data (same data from QGIS choropleth visualizations)
data = pd.read_excel("Data/TR_Proportions.xlsx")

# Group data by "Region", then count occurrances for each trade route segment
grouped_data = data.groupby(["Region"])[["Supply", "Transit", "Demand"]].sum()

# Convert counts into percentages
grouped_data_percentage = grouped_data.div(grouped_data.sum(axis=1), axis=0) * 100

# Create the plot
fig, ax = plt.subplots(figsize=(16, 9))

# Manually set the bar chart order
custom_order = ["Supply", "Transit", "Demand"]

# Set colours for each trade route component (to match with choropleth maps)
role_colours = {
    "Supply": "#73b2d8",
    "Transit": "#7bc77c",
    "Demand": "#fb7050"
}

# Plot the data as a stacked bar chart
grouped_data_percentage[custom_order].plot(kind="bar", stacked=True, ax=ax, color=[role_colours[role] for role in custom_order], width=0.8, zorder=2)

# Change figure component colours
fig.patch.set_facecolor("white") 
ax.set_facecolor("#f0f0f0")

# Add title
fig.suptitle("Continental Distribution of Encountered Study Locations", fontsize=24, fontweight="bold", y=0.95, ha="center")

# Add subtitle
plt.figtext(0.5, 0.89, "Summarized by observations of countries per continent", fontsize=14, fontweight="light", ha="center")

# Adjust spacing
fig.subplots_adjust(top=0.88)

# Add X and Y axis labels
ax.set_xlabel("Continent", fontsize=18, fontweight="bold")
ax.set_ylabel("Percentage (%)", fontsize=18, fontweight="bold")

# Modify X axis tick labels
ax.set_xticklabels(grouped_data_percentage.index, rotation=45, ha="right", fontsize=17)
ax.tick_params(axis="y", labelsize=17)

# Create manual legend to follow the same order as the bar chart
handles = [plt.Rectangle((0, 0), 1, 1, color=role_colours[role]) for role in custom_order]
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), title="Role", title_fontproperties=FontProperties(weight="bold"), loc="upper right", fontsize=12)

# Add grid lines
ax.grid(True, color="darkgrey", linestyle="-", linewidth=0.5, zorder=0)

# Add 50% horizontal line
ax.axhline(y=50, color="#1a1b1c", linestyle="--", linewidth=1, zorder=2)

# Export plot
plt.savefig("Output/stacked_bar_chart_roles_by_region.png", dpi=600, bbox_inches="tight")

# plt.show()