# 🧬 Aligner Search
Purpose: Execute alignment-based search across all configured databases, using multiprocessing and exporting the results to the selected file format.

In [None]:
import pandas as pd
import numpy as np

from Database_comparator import db_compare

## ⚙️ Configuration

In [None]:
CONFIG_FILE = "_DefaultConfigFile.xlsx" # Path to the configuration file containing database connection details
OUTPUT_FILE = "Outputs/aligner_out.csv" # Path to the output file where results will be saved
OUTPUT_FORMAT = "csv"  # Output format can be 'csv','xlsx', 'tsv' or 'md'

## 🧪 Testing

In [3]:
db = db_compare.DB_comparator(config_file=CONFIG_FILE, log_tag="Aligner", log_project="Aligner Project")
db.test.start()

╒════════════════════════╤════════════╤═══════════════════╤══════════════════════╕
│ Test Name              │ Status     │ File Comparison   │ Execution Time (s)   │
╞════════════════════════╪════════════╪═══════════════════╪══════════════════════╡
│ Initialization Test    │ ✅ Success │ N/A               │ 0.29                 │
├────────────────────────┼────────────┼───────────────────┼──────────────────────┤
│ Exporting Test         │ ✅ Success │ N/A               │ 0.08                 │
├────────────────────────┼────────────┼───────────────────┼──────────────────────┤
│ Exact Match Test       │ ✅ Success │ ✅ Match          │ 0.07                 │
├────────────────────────┼────────────┼───────────────────┼──────────────────────┤
│ Hamming Distances Test │ ✅ Success │ ✅ Match          │ 2.38                 │
├────────────────────────┼────────────┼───────────────────┼──────────────────────┤
│ Aligner Test           │ ✅ Success │ ✅ Match          │ 0.64                 │
├───────────

## 🔍 Run Aligner Search

In [4]:
db.aligner.aligner_search_in_all_databases(parallel=True) # Multiprocessing enabled (parallel=True)

## 📤 Export and Display Results

In [7]:
db.export_data_frame(output_file=OUTPUT_FILE, data_format=OUTPUT_FORMAT)
display(pd.read_csv(OUTPUT_FILE)) 

Unnamed: 0,sequence,info,Databases/test_database.csv
0,CPTGGAQGKHIPQSF,62,[seq: CPTGGAQGKHIPQSF (Value_identifier: 8) (T...
1,CKASIPQGTHGGQPF,6,[seq: CKASIPQGTHGGQPF (Value_identifier: 10) (...
2,CPIQGHPASQGGKTF,83,[seq: CPIQGHPASQGGKTF (Value_identifier: 16) (...
3,CATGIHGQSQPKPGF,22,[seq: CATGIHGQSQPKPGF (Value_identifier: 20) (...
4,CPKTGQQSAHGGPIF,73,[seq: CPKTGQQSAHGGPIF (Value_identifier: 16) (...
...,...,...,...
95,CQHQTAPKIPSGGGF,38,[seq: CQHQTAPKIPSGGGF (Value_identifier: 388) ...
96,CQKPPGTGGHQISAF,50,[seq: CQKPPGTGGHQISAF (Value_identifier: 392) ...
97,CQGAIHSKGQPTGPF,50,[seq: CQGAIHSKGQPTGPF (Value_identifier: 396) ...
98,CGGTPQSQHAIPKGF,43,[seq: CGGTPQSQHAIPKGF (Value_identifier: 400) ...
