# Chapter 1: Scatterplots and maps

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
def create_frequency_table_with_totals(df: pd.DataFrame) -> pd.DataFrame:
    """
    Creates a frequency table by adding a 'Total' column and row.

    Parameters:
    df (pd.DataFrame): A DataFrame containing the original frequency data.

    Returns:
    pd.DataFrame: A DataFrame containing the frequency table with row and column totals.
    """
    # Add a new column 'Total' to represent the sum of each row
    df['Total'] = df.sum(axis=1)

    # Add a new row 'Total' to represent the sum of each column
    df.loc['Total'] = df.sum(axis=0)

    return df

In [3]:
def create_percentage_table(df: pd.DataFrame) -> pd.DataFrame:
    """
    Creates a percentage table from the given frequency table.
    The resulting percentage table will exclude the 'Total' column but will add a 'Global' row
    that represents the overall percentages for each category.

    Parameters:
    df (pd.DataFrame): A DataFrame containing the frequency table with totals.

    Returns:
    pd.DataFrame: A DataFrame containing the percentage table with a 'Global' row.
    """
    # Remove the 'Total' column as it's not needed for percentage calculations
    df_percentages = df.drop(columns='Total').div(df['Total'], axis=0) * 100

    # Round to 0 decimals and convert to integer for a cleaner display
    df_percentages = df_percentages.round(0).astype(int)

    # Rename the 'Total' row to 'Global'
    df_percentages.rename(index={'Total': 'Global'}, inplace=True)
    
    return df_percentages

Create the dataframe corresponding to 'Image 1.3: Frequencies of Day Types in the Four Trips':

In [4]:
# Define the relative path to the CSV file
data_path = "../data/chapter1.csv"

In [5]:
# Load the data into a DataFrame
df = pd.read_csv(data_path)

# Set 'Country' as the index to facilitate table readability
df.set_index('Country', inplace=True)

In [6]:
# Create the frequency table with totals
frequency_table = create_frequency_table_with_totals(df)
print("Frequency Table with Totals:")
print(frequency_table)

Frequency Table with Totals:
                Holiday  Half-Day Work  Full-Day Work  Total
Country                                                     
Norway                6              1             11     18
Canada                1              3             11     15
Greece                4             25              0     29
France/Germany        2              2             20     24
Total                13             31             42     86


Create the dataframe corresponding to 'Image 1.5: Percentages of Day Types in Each Trip, as well as Global Percentages, where Each Row Totals 100\%':

In [7]:
# Create the percentage table from the frequency table
percentage_table = create_percentage_table(frequency_table)
print("\nPercentage Table with Global Row:")
print(percentage_table)


Percentage Table with Global Row:
                Holiday  Half-Day Work  Full-Day Work
Country                                              
Norway               33              6             61
Canada                7             20             73
Greece               14             86              0
France/Germany        8              8             83
Global               15             36             49


Create 'Image 1.4: Diagrams of Absolute Frequencies (a) and Relative Frequencies (b), Expressed as Percentages of Rows from Image 1.3':

In [None]:
# Plotting absolute frequencies and relative frequencies
plt.figure(figsize=(14, 6))

# Absolute frequencies plot (a)
plt.subplot(1, 2, 1)
for country in frequency_table.index:
    if country != 'Total':  # Exclude 'Total' row from plotting
        plt.plot(frequency_table.columns[:-1], frequency_table.loc[country, frequency_table.columns[:-1]],
                 marker='o', linestyle='-', label=country)

plt.xlabel('Day Types')
plt.ylabel('Days')
plt.title('Absolute Frequencies of Day Types')
plt.xticks(rotation=0)
plt.ylim(0, 30)
plt.legend()
plt.grid(True)

# Relative frequencies plot (b)
plt.subplot(1, 2, 2)
for country in percentage_table.index:
    if country != 'Global':  # Exclude 'Global' row from plotting
        plt.plot(percentage_table.columns, percentage_table.loc[country],
                 marker='o', linestyle='-', label=country)

plt.xlabel('Day Types')
plt.ylabel('Percentage of Days (%)')
plt.title('Relative Frequencies of Day Types')
plt.xticks(rotation=0)
plt.ylim(0, 100)
plt.legend()
plt.grid(True)

# Show the plots
plt.tight_layout()
plt.show()