In [None]:
from collections import Counter
import matplotlib.pyplot as plt
import logomaker
import matplotlib.patches as mpatches
import os
import platform
import time
from datetime import datetime

print('''
^^ProEpiphary^^ ''')
print('''
Module: Noah-LogosViewer 3.5''')
print('''
analyzing...''')
def read_sequences(file_path):
    with open(file_path, 'r') as file:
        sequences = file.readlines()
    sequences = [line.strip() for line in sequences if line.strip()]
    return sequences

def pad_sequences(sequences):
    max_len = max(len(seq) for seq in sequences)
    padded_sequences = [seq.ljust(max_len, '-') for seq in sequences]
    return padded_sequences

def calculate_similarity(seq_list1, seq_list2):
    if len(seq_list1) != len(seq_list2):
        raise ValueError("Sequence lists must have the same length for comparison.")

    total_positions = 0
    matching_positions = 0

    for seq1, seq2 in zip(seq_list1, seq_list2):
        if len(seq1) != len(seq2):
            raise ValueError("Each sequence pair must have the same length for comparison.")

        total_positions += len(seq1)
        matching_positions += sum(aa1 == aa2 for aa1, aa2 in zip(seq1, seq2))
        influrate = 0.99516105 ** ((total_positions - matching_positions) / total_positions * 99.516105) + 0.00483894

    similarity = (matching_positions / total_positions) * influrate * 100
    return similarity

def generate_combined_pie_chart(sequences1, sequences2, titles, file_name):
    amino_acid_counts1 = Counter(''.join(sequences1).replace('-', ''))
    amino_acid_counts2 = Counter(''.join(sequences2).replace('-', ''))

    labels1, sizes1 = zip(*amino_acid_counts1.most_common())
    labels2, sizes2 = zip(*amino_acid_counts2.most_common())

    fig, axes = plt.subplots(1, 2, figsize=(14, 7))

    axes[0].pie(sizes1, labels=labels1, autopct='%1.1f%%', startangle=90)
    axes[0].set_title(titles[0], fontsize=14)
    axes[0].axis('equal')

    axes[1].pie(sizes2, labels=labels2, autopct='%1.1f%%', startangle=90)
    axes[1].set_title(titles[1], fontsize=14)
    axes[1].axis('equal')

    plt.tight_layout()
    plt.savefig(file_name)
    print(f"Pie chart saved: {file_name}")
    plt.close()

def generate_combined_logo(sequences1, sequences2, titles, file_name, custom_colors, legend_labels=None):
    fig, axes = plt.subplots(1, 2, figsize=(20, 6))

    logo_data1 = logomaker.alignment_to_matrix(sequences1, to_type='counts')
    logo_data2 = logomaker.alignment_to_matrix(sequences2, to_type='counts')

    logomaker.Logo(logo_data1, ax=axes[0], color_scheme=custom_colors)
    axes[0].set_title(titles[0], fontsize=14)

    logomaker.Logo(logo_data2, ax=axes[1], color_scheme=custom_colors)
    axes[1].set_title(titles[1], fontsize=14)

    if legend_labels:
        axes[1].legend(handles=legend_labels, loc='upper left', bbox_to_anchor=(1.05, 0.85), title='Chemistry')

    plt.tight_layout()
    plt.savefig(file_name)
    print(f"Sequence logo saved: {file_name}")
    plt.close()

# Color scheme for sequence logo
custom_colors = {
    letter: '#FF0000' for letter in 'DE'
}
custom_colors.update({
    letter: '#000000' for letter in 'ACFHIJLMOPQZWXUV'
})
custom_colors.update({
    letter: '#0000FF' for letter in 'KR'
})
custom_colors.update({
    letter: '#00FF00' for letter in 'TSGY'
})
custom_colors.update({
    letter: '#800080' for letter in 'N'
})

legend_labels = [
    mpatches.Patch(color='#FF0000', label='Acidic'),
    mpatches.Patch(color='#0000FF', label='Basic'),
    mpatches.Patch(color='#000000', label='Hydrophobic'),
    mpatches.Patch(color='#800080', label='Neutral'),
    mpatches.Patch(color='#00FF00', label='Polar'),
]

if __name__ == "__main__":
    start_time = time.time()

    print("Enter the full path for human AQP0 sequence file (e.g., human aqp0.txt):")
    file1_path = input("Path 1: ").strip()
    print('Enter the full path for Sheep AQP0 sequence file (e.g., sheep aqp0.txt):')
    file2_path = input("Path 2: ").strip()
    print('''
    ''')

    if not os.path.exists(file1_path):
        print(f"Error: File not found -> {file1_path}")
        exit(1)
    if not os.path.exists(file2_path):
        print(f"Error: File not found -> {file2_path}")
        exit(1)

    sequences1 = read_sequences(file1_path)
    sequences2 = read_sequences(file2_path)

    if not sequences1 or not sequences2:
        print("One or both sequence files are empty.")
        exit(1)
    else:
        sequences1 = pad_sequences(sequences1)
        sequences2 = pad_sequences(sequences2)

        output_dir = os.path.dirname(file1_path)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

        pie_chart_file = os.path.join(output_dir, f"Pie_Chart_( {timestamp} ).png")
        generate_combined_pie_chart(
            sequences1,
            sequences2,
            ["Human AQP0 Amino Acid Frequency", "Sheep AQP0 Amino Acid Frequency"],
            pie_chart_file
        )

        logo_file = os.path.join(output_dir, f"Sequence_Logo_( {timestamp} ).png")
        generate_combined_logo(
            sequences1,
            sequences2,
            ["Human AQP0 Sequence Logo", "Sheep AQP0 Sequence Logo"],
            logo_file,
            custom_colors,
            legend_labels
        )

        try:
            similarity = calculate_similarity(sequences1, sequences2)
            print(f"Sequence similarity: {similarity:.4f}%")
            print('''
Mission completed, view the results in the source folder now...''')
        except ValueError as error:
            print(f"Error calculating similarity: {error}")

    end_time = time.time()
    runtime = end_time - start_time
    print("\n----- Local System Information -----")
    print(f"Processor: {platform.processor()}")
    print(f"Total runtime: {runtime:.2f} seconds")
    print('''
v1.2   (Last Update:31/01/2025)''')