# Frequency graphs

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Set paths for matrices from TSV files
# First column will be index of column which will be used to identify each row
plasmid_matrix = pd.read_csv('path_to_plasmid_frequency_matrix', sep='\t', index_col=0)
mutating_cells_matrix = pd.read_csv('path_to_mutating_frequency_matirx', sep='\t', index_col=0)
treated_cells_matrix = pd.read_csv('path_to_drug_treated_frequency_matrix', sep='\t', index_col=0)

# Extract the numbers from the index column
# These numbers are stored in a new column called position 
plasmid_matrix['Position'] = plasmid_matrix.index.str.extract(r'(\d+)', expand=False).astype(int)
mutating_cells_matrix['Position'] = mutating_cells_matrix.index.str.extract(r'(\d+)', expand=False).astype(int)
treated_cells_matrix['Position'] = treated_cells_matrix.index.str.extract(r'(\d+)', expand=False).astype(int)

# Sort by position column
plasmid_matrix.sort_values('Position', inplace=True)
mutating_cells_matrix.sort_values('Position', inplace=True)
treated_cells_matrix.sort_values('Position', inplace=True)

# Function to plot frequency data with logarithmic scale
def plot_frequencies_log_with_black_fill(matrix, title):
    plt.figure(figsize=(12, 6))
    # Exclude the position column
    for sample in matrix.columns[:-1]:  
        plt.plot(matrix['Position'], matrix[sample], marker='None', linestyle='-', color='black', linewidth=1)
        # The plot is filled black
        plt.fill_between(matrix['Position'], matrix[sample], color='black', alpha=1.0)  

    plt.xlabel('Position')
    plt.ylabel('Frequency')
    # y-axis set to logarithmic scale
    plt.yscale('log')  
    plt.ylim(1e-6, 0.1)  
    plt.title(title)
    plt.show()

# Plotting the frequency graphs
plot_frequencies_log_with_black_fill(plasmid_matrix, 'Logarithmic Frequencies of Mutations in Plasmid Samples')
plot_frequencies_log_with_black_fill(mutating_cells_matrix, 'Logarithmic Frequencies of Mutations in Mutating Cells Samples')
plot_frequencies_log_with_black_fill(treated_cells_matrix, 'Logarithmic Frequencies of Mutations in Treated Cells Samples')
