# Welcome to the `pysan` development notebook
This colab notebook is currently being tested as a workspace to develop new features. Code below is not deployment-ready, and is meant purely as a transparent way to see what kinds of features are in the works.

Note: this notebook is meant to be run on your local system after cloning the repository.

In [1]:
import sys
sys.path.insert(1,'../../')
import pysan as ps
import matplotlib.pyplot as plt

pysan ready


## Development Area
If the output of the previous cell reads `pysan ready.`, then everything is working and you're ready to go. Remember to leave lots of comments (and docstrings) so that anyone reading this page can understand what's going on!

In [5]:
import pysan.core as pysan_core

sequences = ps.generate_sequences(5, 10, [1,2,3])



[[2, 3, 2, 2, 3, 1, 2, 2, 1, 2],
 [3, 1, 3, 3, 1, 3, 1, 3, 3, 1],
 [1, 1, 2, 3, 3, 1, 3, 1, 3, 3],
 [1, 3, 1, 2, 3, 2, 3, 1, 3, 2],
 [1, 3, 2, 2, 2, 2, 3, 3, 1, 3]]

In [3]:
import numpy as np
from string import *



def get_distance(s1,s2,match = 0,mismatch = -1, gap = -1):
	"""
	Computes the optimal matching distance between two sequences using the 'Needleman-Wunsch algorithm <https://www.sciencedirect.com/science/article/abs/pii/0022283670900574?via%3Dihub>`_ based on Devon Ryan's implementation found `here <https://www.biostars.org/p/231391/>`_.
	
	"""
	
	penalty = {'MATCH': match, 'MISMATCH': mismatch, 'GAP': gap} #A dictionary for all the penalty valuse.
	n = len(s1) + 1 #The dimension of the matrix columns.
	m = len(s2) + 1 #The dimension of the matrix rows.
	al_mat = np.zeros((m,n),dtype = float) #Initializes the alighment matrix with zeros.
	p_mat = np.zeros((m,n),dtype = str) #Initializes the pointer matrix with zeros.
	#Scans all the first rows element in the matrix and fill it with "gap penalty"
	for i in range(m):
		al_mat[i][0] = penalty['GAP'] * i
		p_mat[i][0] = 'V'
	#Scans all the first columns element in the matrix and fill it with "gap penalty"
	for j in range (n):
		al_mat[0][j] = penalty['GAP'] * j
		p_mat [0][j] = 'H'
	
	
	#-------------------------------------------------------
	#This function returns to values for cae of match or mismatch
	def Diagonal(n1,n2,pt):
		if(n1 == n2):
			return pt['MATCH']
		else:
			return pt['MISMATCH']
	
	#------------------------------------------------------------   
	#This function gets the optional elements of the aligment matrix and returns the elements for the pointers matrix.
	def Pointers(di,ho,ve):
		pointer = max(di,ho,ve) #based on python default maximum(return the first element).

		if(di == pointer):
			return 'D'
		elif(ho == pointer):
			return 'H'
		else:
			 return 'V'
	
	#Fill the matrix with the correct values.
	p_mat [0][0] = 0 #Return the first element of the pointer matrix back to 0.
	for i in range(1,m):
		for j in range(1,n):
			di = al_mat[i-1][j-1] + Diagonal(s1[j-1],s2[i-1],penalty) #The value for match/mismatch -  diagonal.
			ho = al_mat[i][j-1] + penalty['GAP'] #The value for gap - horizontal.(from the left cell)
			ve = al_mat[i-1][j] + penalty['GAP'] #The value for gap - vertical.(from the upper cell)
			al_mat[i][j] = max(di,ho,ve) #Fill the matrix with the maximal value.(based on the python default maximum)
			p_mat[i][j] = Pointers(di,ho,ve)
	
	#print(np.matrix(al_mat))
	#print(np.matrix(p_mat))
	
	# optimal alignment score = bottom right value in al_mat
	score = al_mat[m-1][n-1]
	#print(score)
	return score

s1 = 'aaaabbbb'
s2 = 'abbccdee'

s1 = [1,1,2,2,3]
s2 = [1,2,2,3,3]

get_distance(s1,s2)

-2.0