# Align sequences

In [3]:
import logging
logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

import json
from pyeed.core import ProteinRecord


# load accession ids from json file
with open("ids.json", "r") as f:
    ids = json.load(f)

sequences = ProteinRecord.get_ids(ids)

Output()

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='/Users/max/miniconda3/envs/pyeed/lib/python3.11/site-packages/certifi/cacert.pem'
DEBUG:pyeed.fetch.requester:Creating 2 tasks
DEBUG:pyeed.fetch.requester:Sending 2 requests
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='/Users/max/miniconda3/envs/pyeed/lib/python3.11/site-packages/certifi/cacert.pem'
DEBUG:pyeed.fetch.requester:Creating 48 tasks
DEBUG:pyeed.fetch.requester:Sending 48 requests
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='/Users/max/miniconda3/envs/pyeed/lib/python3.11/site-packages/certifi/cacert.pem'
DEBUG:pyeed.fetch.requester:Creating 48 tasks
DEBUG:pyeed.fetch.requester:Sending 48 requests
DEBUG:pyeed.fetch.requester:Sending request to https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protei

## Multi Sequence Alignment

A multi sequence alignment can be calculated by creating a `MSA` object and passing a list of `ProteinRecord`. The alignment can be executed by calling the `clustalo` method. In order for the `clustalo` method to work, the PyEED Docker Service must be running. The `clustalo` method will return an `AlignmentResult` containing all input `sequences` and `aligned_sequences`.

In [None]:
from pyeed.align import MSA

alignment = MSA(sequences=sequences).clustalo()
alignment.visualize()

DEBUG:pyeed.tools.clustalo:Connecting to ClustalOmega service at http://clustalo:5001/align.
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='/Users/max/miniconda3/envs/pyeed/lib/python3.11/site-packages/certifi/cacert.pem'
DEBUG:httpcore.connection:connect_tcp.started host='clustalo' port=5001 local_address=None timeout=600 socket_options=None
DEBUG:httpcore.connection:connect_tcp.failed exception=ConnectError(gaierror(8, 'nodename nor servname provided, or not known'))
DEBUG:pyeed.tools.clustalo:ClustalOmega service not reachable at http://clustalo:5001/align.
DEBUG:pyeed.tools.clustalo:Trying to connect to http://localhost:5001/align.
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='/Users/max/miniconda3/envs/pyeed/lib/python3.11/site-packages/certifi/cacert.pem'
DEBUG:httpcore.connection:connect_tcp.started host='localhost' port=5001 local_ad

Output()

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Sat, 22 Jun 2024 09:47:27 GMT'), (b'server', b'uvicorn'), (b'content-length', b'41338'), (b'content-type', b'application/json')])
INFO:httpx:HTTP Request: POST http://localhost:5001/align "HTTP/1.1 200 OK"
DEBUG:httpcore.http11:receive_response_body.started request=<Request [b'POST']>
DEBUG:httpcore.http11:receive_response_body.complete
DEBUG:httpcore.http11:response_closed.started
DEBUG:httpcore.http11:response_closed.complete
DEBUG:httpcore.connection:close.started
DEBUG:httpcore.connection:close.complete


✅ Alignment completed


DEBUG:matplotlib.axes._base:top of Axes not in the figure, so title not moved
DEBUG:matplotlib.axes._base:top of Axes not in the figure, so title not moved
DEBUG:matplotlib.axes._base:top of Axes not in the figure, so title not moved


## Create a HMM profile

To create a hidden markov model profile, you can use the `HMM` class. This method receives a `MSA` object to create the model. To check if a sequence belongs to the profile, you can use the `search` method. This method takes a `ProteinRecord` object and returns a `HMMResult` object containing the `sequence` and the `score` of the sequence in the profile.

In [None]:
from pyeed.align import HMM

model = HMM(name="random profile", alignment=alignment)
hits = model.search(sequence=sequences[0])