In [26]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [27]:
def convert_csv_to_triphone_dict(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    # Convert DataFrame to dictionary
    triphone_count_result = {
        tuple(triphone.split('-')): count for triphone, count in zip(df['Triphone'], df['Count'])
    }

    return triphone_count_result

In [28]:
def read_tone_data(file_path):
    # Define the mapping from French to English
    tone_mapping = {
        'HAUT': "ˊ",      # high
        'BAS': "ˋ",       # low
        'BAS-HAUT': "ˇ",  # low high
        'HAUT-BAS': "ˆ"   # high low
    }

    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)

    # Convert DataFrame to dictionary with English tone labels
    tone_count_result = {
        tone_mapping[row['Tone']]: row['Count'] for index, row in df.iterrows() if row['Tone'] in tone_mapping
    }

    return tone_count_result

In [29]:
def draw_graph(triphone_count_result, output_image_file, top_n=40):
    # Sort triphones by count in descending order
    sorted_triphones = sorted(triphone_count_result.items(), key=lambda item: item[1], reverse=True)
    
    # Limit to top N triphones
    if len(sorted_triphones) > top_n:
        sorted_triphones = sorted_triphones[:top_n]
    
    triphones = ['-'.join(triphone) for triphone, count in sorted_triphones]
    counts = [count for triphone, count in sorted_triphones]

    # Calculate mean and median
    mean_count = np.mean(counts)
    median_count = np.median(counts)

    plt.figure(figsize=(15, 6.5))  # Increase figure size
    plt.bar(triphones, counts, color='#1f77b4')
    plt.axhline(y=mean_count, color='r', linestyle='-', label=f'Mean: {mean_count:.2f}')
    plt.axhline(y=median_count, color='y', linestyle='--', label=f'Median: {median_count:.2f}')
    plt.xlabel('Triphones', fontsize=28)  # Increase label size
    plt.ylabel('Frequency', fontsize=28)  # Increase label size
    plt.yticks(fontsize=24)  # Increase y-axis graduation size
    plt.xticks(rotation=90, fontsize=28)  # Increase x-axis labels size
    plt.legend(fontsize=28)  # Increase legend size
    plt.tight_layout()

    # Save the plot as an EPS file
    plt.savefig(output_image_file, format='eps')
    plt.close()

In [63]:
def draw_tone_graph(tone_count_result, output_image_file):
    # Sort tones by count in descending order
    sorted_tones = sorted(tone_count_result.items(), key=lambda item: item[1], reverse=True)
    
    tones = [tone for tone, count in sorted_tones]
    counts = [count for tone, count in sorted_tones]

    plt.figure(figsize=(10, 10))
    plt.bar(tones, counts, color='#1f77b4')
    plt.xlabel('Tones', fontsize=40)  # Increase label size
    plt.ylabel('Frequency', fontsize=40)  # Increase label size
    plt.yticks(fontsize=30)  # Increase y-axis graduation size
    plt.xticks(rotation=360, fontsize=70)  # Increase x-axis labels size
    plt.tight_layout()
    
    # Save the plot as an EPS file
    plt.savefig(output_image_file, format='eps')
    plt.close()

In [64]:
file_path = 'tone_count.csv'  # Replace with your actual file path
tone_count_result = read_tone_data(file_path)

output_image_file = 'tone_distribution.eps'  # Output file name
draw_tone_graph(tone_count_result, output_image_file)

print(f"Graph saved as {output_image_file}.")

Graph saved as tone_distribution.eps.


In [65]:
# Convert CSV to triphone count dictionary
file_path = 'triphone_count.csv'  # Replace with your actual file path
triphone_count_result = convert_csv_to_triphone_dict(file_path)

# Call the draw_graph function to create and save the image
output_image_file = 'triphone_frequency_distribution.eps'  # Output file name
top_n = 20  # Number of top triphones to display

draw_graph(triphone_count_result, output_image_file, top_n)

print(f"Graph saved as {output_image_file}.")

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.


Graph saved as triphone_frequency_distribution.eps.
