**ALIGNING COSMIC HUMAN GENES TO ALL DOG GENES TO IDENTIFY MATCHING GENES**

In [None]:
#reading fasta files from dog and human, global alignment of each dog sequence to each human genes,
#saving in a csv file seuqnces, gene name, for the highest pidenity per each gene to find the corrispondance in dog

import psa
import pandas as pd
from Bio import SeqIO

def read_fasta(fasta_file):
    """
    Reads a FASTA file and gives back a tuple (header, sequence).
    """
    sequences = []
    for record in SeqIO.parse(fasta_file, "fasta"):
        sequences.append((record.id, str(record.seq)))
    return sequences

def perform_global_alignment(human_seqs, dog_seqs):
    results = []
    alignment_counter = 0  # Initialize alignment counter

    # Execute global alignemnt for each human sequnce with all dog sequences
    for human_id, human_seq in human_seqs:
        best_match = {'Human Gene': human_id, 'Dog Gene': None, 'Pidentity': 0, 'Sequence': None}

        for dog_id, dog_seq in dog_seqs:
            aln = psa.needle(moltype='prot', qseq=human_seq, sseq=dog_seq)

            if aln.pidentity > best_match['Pidentity']:
                best_match.update({
                    'Dog Gene': dog_id,
                    'Pidentity': aln.pidentity,
                    'Sequence': aln.sseq  # Dog sequence
                })
        
        results.append(best_match)
        alignment_counter += 1  
        print(f"Alignements Completed: {alignment_counter}")

    return results

def main(human_fasta, dog_fasta, output_csv):
    human_seqs = read_fasta(human_fasta)
    dog_seqs = read_fasta(dog_fasta)

    alignment_results = perform_global_alignment(human_seqs, dog_seqs)

    df_results = pd.DataFrame(alignment_results)
    print(df_results)
    
    # Export results as CSV file
    df_results.to_csv(output_csv, index=False)
    print(f"Results exported in: {output_csv}")

if __name__ == "__main__":
    import sys
    if len(sys.argv) != 4:
        print("Usage: python script.py human_cosmic_genes.fasta df_dog_sequences.fasta output.csv")
        sys.exit(1)
    
    human_fasta = sys.argv[1]
    dog_fasta = sys.argv[2]
    output_csv = sys.argv[3]
    main(human_fasta, dog_fasta, output_csv)
