# 🧬 BLAST Search
Purpose: Perform a BLAST-like sequence similarity search using a custom database. Includes database preparation, parameter display, and result export.

In [1]:
import pandas as pd
import numpy as np

from Database_comparator import db_compare

## ⚙️ Configuration

In [2]:
CONFIG_FILE = "_DefaultConfigFile.xlsx" # Path to the configuration file containing database connection details
OUTPUT_FILE = "Outputs/blast_out.csv" # Path to the output file where results will be saved
OUTPUT_FORMAT = "csv"  # Output format can be 'csv','xlsx', 'tsv' or 'md'

## 🧪 Testing

In [None]:
db = db_compare.DB_comparator(CONFIG_FILE, log_tag="BLAST")

╒════════════════════════╤════════════╤═══════════════════╤══════════════════════╕
│ Test Name              │ Status     │ File Comparison   │   Execution Time (s) │
╞════════════════════════╪════════════╪═══════════════════╪══════════════════════╡
│ Initialization Test    │ ✅ Success │ N/A               │                 0.12 │
├────────────────────────┼────────────┼───────────────────┼──────────────────────┤
│ Exporting Test         │ ✅ Success │ N/A               │                 0.01 │
├────────────────────────┼────────────┼───────────────────┼──────────────────────┤
│ Exact Match Test       │ ✅ Success │ ✅ Match          │                 0.03 │
├────────────────────────┼────────────┼───────────────────┼──────────────────────┤
│ Hamming Distances Test │ ✅ Success │ ✅ Match          │                 2.00 │
├────────────────────────┼────────────┼───────────────────┼──────────────────────┤
│ Aligner Test           │ ✅ Success │ ✅ Match          │                 0.69 │
├───────────

## 🔍 Run BLAST Workflow

In [None]:
db.blast.blast_database_info() # Provides information about the BLAST database

db.blast.blast_make_database(name="BLAST_Database") # Creates BLAST database
db.blast.blast_search_and_analyze_matches_in_database() # Query is input database

Inserted databases:
{'path': 'Databases/test_database.csv', 'sequence_column_name': 'sequence', 'results_column': 'Databases/test_database.csv', 'identifier_of_seq': ['Value_identifier', 'Text_identifier']}
-----------------------------------------------------
Matrix: BLOSUM62
GaPenalties: Existence: 11, Extension: 1
Neighboring words threshold: 11
Window for multiple hits: 40


Unnamed: 0,sequence,info,Databases/test_database.csv
0,CPTGGAQGKHIPQSF,62,[seq: CPTGGAQGKHIPQSF identifier:(Value_identi...
1,CKASIPQGTHGGQPF,6,[seq: CKASIPQGTHGGQPF identifier:(Value_identi...
2,CPIQGHPASQGGKTF,83,[seq: CPIQGHPASQGGKTF identifier:(Value_identi...
3,CATGIHGQSQPKPGF,22,[seq: CATGIHGQSQPKPGF identifier:(Value_identi...
4,CPKTGQQSAHGGPIF,73,[seq: CPKTGQQSAHGGPIF identifier:(Value_identi...
...,...,...,...
95,CQHQTAPKIPSGGGF,38,[seq: CQHQTAPKIPSGGGF identifier:(Value_identi...
96,CQKPPGTGGHQISAF,50,[seq: CQKPPGTGGHQISAF identifier:(Value_identi...
97,CQGAIHSKGQPTGPF,50,[seq: CQGAIHSKGQPTGPF identifier:(Value_identi...
98,CGGTPQSQHAIPKGF,43,[seq: CGGTPQSQHAIPKGF identifier:(Value_identi...


## 📤 Export and Display Results

In [5]:
db.export_data_frame(output_file=OUTPUT_FILE, data_format=OUTPUT_FORMAT)
display(pd.read_csv(OUTPUT_FILE)) 

Unnamed: 0,sequence,info,Databases/test_database.csv
0,CPTGGAQGKHIPQSF,62,[seq: CPTGGAQGKHIPQSF identifier:(Value_identi...
1,CKASIPQGTHGGQPF,6,[seq: CKASIPQGTHGGQPF identifier:(Value_identi...
2,CPIQGHPASQGGKTF,83,[seq: CPIQGHPASQGGKTF identifier:(Value_identi...
3,CATGIHGQSQPKPGF,22,[seq: CATGIHGQSQPKPGF identifier:(Value_identi...
4,CPKTGQQSAHGGPIF,73,[seq: CPKTGQQSAHGGPIF identifier:(Value_identi...
...,...,...,...
95,CQHQTAPKIPSGGGF,38,[seq: CQHQTAPKIPSGGGF identifier:(Value_identi...
96,CQKPPGTGGHQISAF,50,[seq: CQKPPGTGGHQISAF identifier:(Value_identi...
97,CQGAIHSKGQPTGPF,50,[seq: CQGAIHSKGQPTGPF identifier:(Value_identi...
98,CGGTPQSQHAIPKGF,43,[seq: CGGTPQSQHAIPKGF identifier:(Value_identi...
