In [None]:
import numpy as np
import pandas as pd
import os
import sys
import subprocess
import rpy2
from rpy2 import *
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import rpy2.robjects.packages as rpackages
from rpy2.robjects.vectors import StrVector
#from rpy2.robjects import globalenv
from rpy2.robjects.numpy2ri import numpy2ri
from rpy2.robjects.packages import STAP
#numpy2ri.activate()
from rpy2.robjects import pandas2ri
import shutil
from shutil import copyfile
import math
import re
import fileinput
import tkinter
from tkinter import *
#import tkFont
#from PIL import Image
#from PIL import ImageTk
from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di 

di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)


#generating 621D features

def feature_extraction(file_name,svalue):
    
    fasta_file_name=file_name + ".fasta"
    lines = [line.rstrip('\n') for line in open(fasta_file_name)]    
    save_sequences= ''
    sequences= []
    sequences_name= []
    for l in lines:
        if(l == ""): #blank lines are disregarded
                pass
        elif (l[0] == '>'):
            sequences_name.append(l[1:])
            sequences.append(save_sequences)
            save_sequences= ''
        else:
            save_sequences+= l

    sequences.append(save_sequences)
    del sequences[0]
    
    
    all_generated_features=pd.DataFrame()
    

     
    #amino acid composition
    
    def aa_composition(seq):
        
        r=ro.r
        r.source("AAC_AVP.R") #calling R script to compute amino acid composition
  
        aa_comp = r.extractAAC_revised(seq)
        return aa_comp
    
    #dipeptide composition
    
    def dc_composition(seq):
        
        r=ro.r
        r.source("DC_AVP.R") #calling R script to compute amino acid composition
  
        dc_comp = r.extractDC_revised(seq)
        return dc_comp
    
    #pseudo-amino acid composition
    
    def pseaac_composition(seq,path):
        
        r=ro.r
        r.source("file_with_pkgTest.R") #check whether required R package is installed
        r.pkgTest("base")
        r.pkgTest("protr")
    #r.source("SVM_prediction.R") #calling R script to get prediction results from SVM
    #r.source("file_with_pkgTest_decipher.R")
    #r.pkgTest_decipher("DECIPHER")
        r.source("PAAC_AVP.R") #calling R script to compute amino acid composition
  
        pseaac_comp = r.extractPAAC_revised(seq,path)
        return pseaac_comp
    
    #amphiphilic pseudo-amino acid composition
    
    def apseaac_composition(seq,path):
        
        r=ro.r
        r.source("file_with_pkgTest.R") #check whether required R package is installed
        r.pkgTest("base")
        r.pkgTest("protr")
    #r.source("SVM_prediction.R") #calling R script to get prediction results from SVM
    #r.source("file_with_pkgTest_decipher.R")
    #r.pkgTest_decipher("DECIPHER")
        r.source("APAAC_AVP.R") #calling R script to compute amino acid composition
  
        apseaac_comp = r.extractAPAAC_revised(seq,path)
        return apseaac_comp
    
    #composition for CTD model 
    
    def ctd_composition(seq):
        
        r=ro.r
        r.source("CTDC.R") #calling R script to compute composition for CTD model
  
        ctd_comp = r.extractCTDC_revised(seq)
        return ctd_comp
    
    
    #transition for CTD model
    
    def ctd_transition(seq):
        
        r=ro.r
        r.source("CTDT.R") #calling R script to compute transition for CTD model
  
        ctd_trans = r.extractCTDT_revised(seq)
        return ctd_trans
    
    
    #distribution for CTD model
    
    def ctd_distribution(seq):
        
        r=ro.r
        r.source("CTDD.R") #calling R script to compute distribution for CTD model
  
        ctd_distr = r.extractCTDD_revised(seq)
        return ctd_distr
    

    
    #generate secondary structure features
        
    def secondstruct_feat(seq):
        
        
        base = importr('base')
        
        r=ro.r
        #r.source("file_with_pkgTest.R") #check whether required R package is installed
        #r.pkgTest("base")
        
        #r.source("file_with_pkgTest_decipher.R")
        #r.pkgTest_decipher("DECIPHER")
        r.source("GL_new.R") #calling R script to compute secondary structure features
  
        ss_ft = r.extractSSF(seq)
        #print(ssfile)
        return ss_ft
    
    
    #####################################################################################
    
    
    aa_dict='ARNDCEQGHILKMFPSTWYV'
    
    for i in range(1,21):
        all_generated_features["aac_%s"%i]=""
    
    for i in range(1,401):
        all_generated_features["dipep_%s"%i]=""
        
    for i in range(1,26):
        all_generated_features["pseudo_%s"%i]=""
        
    for i in range(1,31):
        all_generated_features["amphipseudo_%s"%i]=""
        
    for i in range(1,25):
        all_generated_features["comp_%s"%i]=""
    
    for i in range(1,25):
        all_generated_features["tran_%s"%i]=""
        
    for i in range(1,121):
        all_generated_features["dist_%s"%i]=""
    
    for i in range(1,7):
        all_generated_features["ss_%s"%i]=""
    
    
    # Execute for each sequence
    
    seqs_length=[]
    counter=0
    counting_seq=0

    for seq in sequences:
        seqs_length.append(len(seq))
        counter+=1
        aa_comp=aa_composition(seq)
        dir_path = os.getcwd()
        dir_path = dir_path.replace("\\", "/")
        dir_path+="/AAidx.csv"
        dc_comp=dc_composition(seq)
        pseaac_comp=pseaac_composition(seq,dir_path)
        apseaac_comp=apseaac_composition(seq,dir_path)
        ctd_comp = ctd_composition(seq)
        ctd_trans = ctd_transition(seq)
        ctd_distr = ctd_distribution(seq)
        ss_struct = secondstruct_feat(seq)
        
        all_generated_features.at[counting_seq,"aac_1":"aac_20"]=aa_comp
        all_generated_features.at[counting_seq,"dipep_1":"dipep_400"]=dc_comp
        all_generated_features.at[counting_seq,"pseudo_1":"pseudo_25"]=pseaac_comp
        all_generated_features.at[counting_seq,"amphipseudo_1":"amphipseudo_30"]=apseaac_comp
        all_generated_features.at[counting_seq,"comp_1":"comp_24"]=ctd_comp
        all_generated_features.at[counting_seq,"tran_1":"tran_24"]=ctd_trans
        all_generated_features.at[counting_seq,"dist_1":"dist_120"]=ctd_distr
        all_generated_features.at[counting_seq,"ss_1":"ss_6"]=ss_struct
        
        x_len=len(seq)
        
        #get path for current directiry
        

        
        
        counting_seq+=1
    
    
    
    eliminate= []
    
    if svalue==0:
        #features_selected=["S5", "D80", "C21", "S4", "T10", "D81"]
        features_selected=["aac_1","aac_2","aac_3","aac_4","aac_6","aac_7","aac_8","aac_9","aac_10","aac_11","aac_12","aac_15","aac_16","aac_17","aac_18","aac_19","aac_20","dipep_32","dipep_51","dipep_111","dipep_211","dipep_220","dipep_340","pseudo_1","pseudo_2","pseudo_3","pseudo_4","pseudo_5","pseudo_10","pseudo_11","pseudo_12","pseudo_14","pseudo_16","pseudo_18","pseudo_20","pseudo_21","pseudo_22","pseudo_23","pseudo_24","pseudo_25","amphipseudo_21","amphipseudo_22","amphipseudo_23","amphipseudo_24","amphipseudo_25","amphipseudo_26","amphipseudo_27","amphipseudo_29","amphipseudo_30","comp_1","comp_2","comp_3","comp_4","comp_5","comp_6","comp_10","comp_11","comp_13","comp_14","comp_15","comp_16","comp_17","comp_18","comp_19","comp_21","comp_22","comp_23","comp_24","tran_1","tran_2","tran_3","tran_4","tran_5","tran_6","tran_11","tran_12","tran_13","tran_14","tran_16","tran_17","tran_18","tran_19","tran_20","tran_21","tran_22","tran_23","tran_24","dist_1","dist_2","dist_3","dist_4","dist_7","dist_8","dist_9","dist_10","dist_11","dist_12","dist_13","dist_14","dist_15","dist_16","dist_17","dist_18","dist_22","dist_23","dist_24","dist_25","dist_26","dist_27","dist_28","dist_29","dist_30","dist_32","dist_34","dist_38","dist_41","dist_46","dist_47","dist_50","dist_52","dist_53","dist_55","dist_56","dist_61","dist_62","dist_63","dist_65","dist_67","dist_68","dist_70","dist_71","dist_72","dist_73","dist_76","dist_77","dist_78","dist_79","dist_82","dist_83","dist_84","dist_85","dist_86","dist_87","dist_88","dist_89","dist_90","dist_91","dist_93","dist_94","dist_97","dist_99","dist_100","dist_102","dist_103","dist_105","dist_106","dist_107","dist_108","dist_109","dist_112","dist_113","dist_114","dist_115","dist_116","dist_117","dist_118","dist_119","dist_120","ss_1"]
        
    for i in range(1,21):
        a="aac_%s"%i
        if a not in features_selected:
            eliminate.append(a)
            
    for i in range(1,401):
        a="dipep_%s"%i
        if a not in features_selected:
            eliminate.append(a)
    
    for i in range(1,26):
        a="pseudo_%s"%i
        if a not in features_selected:
            eliminate.append(a)        
    
    for i in range(1,31):
        a="amphipseudo_%s"%i
        if a not in features_selected:
            eliminate.append(a)
    
    for i in range(1,25):
        a="comp_%s"%i
        if a not in features_selected:
            eliminate.append(a)
            
    for i in range(1,25):
        a="tran_%s"%i
        if a not in features_selected:
            eliminate.append(a)
    
    for i in range(1,121):
        a="dist_%s"%i
        if a not in features_selected:
            eliminate.append(a)  
    
            
    for i in range(1,7):
        a="ss_%s"%i
        if a not in features_selected:
            eliminate.append(a)
    
    all_generated_features=all_generated_features.drop(eliminate, axis=1)
    
    if svalue==0:
        #all_generated_features=all_generated_features[['S5', 'D80', 'C21', 'S4', 'T10', 'D81']]
        all_generated_features=all_generated_features[['aac_1','aac_2','aac_3','aac_4','aac_6','aac_7','aac_8','aac_9','aac_10','aac_11','aac_12','aac_15','aac_16','aac_17','aac_18','aac_19','aac_20','dipep_32','dipep_51','dipep_111','dipep_211','dipep_220','dipep_340','pseudo_1','pseudo_2','pseudo_3','pseudo_4','pseudo_5','pseudo_10','pseudo_11','pseudo_12','pseudo_14','pseudo_16','pseudo_18','pseudo_20','pseudo_21','pseudo_22','pseudo_23','pseudo_24','pseudo_25','amphipseudo_21','amphipseudo_22','amphipseudo_23','amphipseudo_24','amphipseudo_25','amphipseudo_26','amphipseudo_27','amphipseudo_29','amphipseudo_30','comp_1','comp_2','comp_3','comp_4','comp_5','comp_6','comp_10','comp_11','comp_13','comp_14','comp_15','comp_16','comp_17','comp_18','comp_19','comp_21','comp_22','comp_23','comp_24','tran_1','tran_2','tran_3','tran_4','tran_5','tran_6','tran_11','tran_12','tran_13','tran_14','tran_16','tran_17','tran_18','tran_19','tran_20','tran_21','tran_22','tran_23','tran_24','dist_1','dist_2','dist_3','dist_4','dist_7','dist_8','dist_9','dist_10','dist_11','dist_12','dist_13','dist_14','dist_15','dist_16','dist_17','dist_18','dist_22','dist_23','dist_24','dist_25','dist_26','dist_27','dist_28','dist_29','dist_30','dist_32','dist_34','dist_38','dist_41','dist_46','dist_47','dist_50','dist_52','dist_53','dist_55','dist_56','dist_61','dist_62','dist_63','dist_65','dist_67','dist_68','dist_70','dist_71','dist_72','dist_73','dist_76','dist_77','dist_78','dist_79','dist_82','dist_83','dist_84','dist_85','dist_86','dist_87','dist_88','dist_89','dist_90','dist_91','dist_93','dist_94','dist_97','dist_99','dist_100','dist_102','dist_103','dist_105','dist_106','dist_107','dist_108','dist_109','dist_112','dist_113','dist_114','dist_115','dist_116','dist_117','dist_118','dist_119','dist_120','ss_1']]
    
    all_generated_features.to_csv("%s.csv" %file_name, header=True, index=False)
    



#predicting antiviral peptide sequences
def predict_AVP_sequences(svalue):
    
    #reading training and test datsets 
    
    dir_path = os.getcwd()
    dir_path = dir_path.replace("\\", "/")
    if svalue==0:
        training_file_path=dir_path + "/selected_train_test_merged_file.csv" #path for aac training set
    
    
    testing_file_path=dir_path + "/input_seq.csv" #path for input sequences
    #print(training_file_path)
    #print(testing_file_path)
    #base = importr('base')
    #utils = rpackages.importr('utils')
    #utils.chooseCRANmirror(ind=1) # select the first mirror in the list
    #packnames = ('ROSE', 'e1071', 'caret') 
    #utils.install_packages(StrVector(packnames))
    r=ro.r
    r.source("file_with_pkgTest.R") #check whether required R package is installed
    #r.pkgTest("ROSE")
    r.pkgTest("base")
    r.pkgTest("e1071") #install e1071 R package if not installed
    r.pkgTest("caret")
    #r.pkgTest("protr")
    #r.source("SVM_prediction.R") #calling R script to get prediction results from SVM
    #r.source("file_with_pkgTest_decipher.R")
    #r.pkgTest_decipher("DECIPHER")
    r.source("SVM_classifier_revised_new.R") #calling R script to get prediction results from SVM
    predictions = r.predict_results(training_file_path,testing_file_path)
    #print(predictions)
    #print(predictions[1])
    
    
    # #################################################################
    lines = [line.rstrip('\n') for line in open('input_seq.fasta')]
    

    #reading all sequences in sequences and their names in sequences_name
    save_sequences= ''
    sequences_input= []
    sequences_name_input= []


    for l in lines:
        if(l == ""): #blank lines are disregarded
                pass
        elif (l[0] == '>'):
            sequences_name_input.append(l[1:])
            sequences_input.append(save_sequences)
            save_sequences= ''
        else:
            save_sequences+= l

    sequences_input.append(save_sequences)
    del sequences_input[0]
    
 
  #generate prediction statistics

    predict_file = open("predicted_AVP_sequences.fasta", "w+")

    count_resistance_sequences=0   
    for i in range(len(predictions)):
        if (predictions[i]==1 ):
            predict_file.write(str(sequences_name_input[i])+ '\n'+ '\n')
            count_resistance_sequences+=1
    predict_file.close()
    del predict_file


    one_line="Total number of predicted AVP sequences = "+str(count_resistance_sequences) + "\n"  +"\n" 
    with open("predicted_AVP_sequences.fasta", 'r+') as fp:
        lines = fp.readlines()     
        lines.insert(0, one_line)  
        fp.seek(0)                 
        fp.writelines(lines)  


#include new sequences to the training and test datasets 

def add_new_sequences(svalue,sgval): 
    file_new = open("input_seq.fasta", "r")
    #data_new = file_new.read()
    file_new.close()
    
    
        
    feature_extraction('input_seq',svalue)
    df1 = pd.read_csv('input_seq.csv')
    if sgval==1:
        Otpt = [1] * df1.shape[0]
    else:
        Otpt = [-1] * df1.shape[0]
    df1["Output"]= Otpt
    df1.to_csv("seq_excld_header.csv", header=False, index=False)
    file_new_features = open("seq_excld_header.csv", "r")
    data_new_features = file_new_features.read()
    file_new_features.close()
    
    if svalue==0:
        file_all_features= open("selected_train_test_merged_file.csv","a")
    
    
    file_all_features.write(data_new_features)
    file_all_features.close()

    # reset training and test data sets
def restore_training_data():
    
    copyfile("selected_train_test_merged_file_actual.csv", "selected_train_test_merged_file.csv")
    
    
    

    
#build graphical user interface

root = tkinter.Tk()
root.title("FIRMAVP")
root.geometry("520x485")
root.configure(background="peach puff")
#root.wm_attributes('-alpha', 0.7)


#canvas = Canvas(root, width=235, height = 139)  
canvas = Canvas(root, width=159, height = 173, bg='peach puff',highlightthickness=0)
#canvas = Canvas(root, width=362, height = 352, bg='white smoke',highlightthickness=0)
canvas.pack(padx=1, pady=1)


#add logo of the tool

#width=389
#height=398
#img = Image.open("AS_logo_design.gif")
#img = PhotoImage(file="PARGT_logo.gif") 
#img = img.resize((width,height), Image.ANTIALIAS)
#photoImg =  ImageTk.PhotoImage(img)

#canvas.create_image(120,71, anchor=CENTER, image=img) 
#canvas.create_image(140,145, anchor=CENTER, image=img)
#canvas.create_image(170,130, anchor=CENTER, image=photoImg)


T = Text(root, font="none 12 bold",bd=0,height=2, width=33, padx=0, pady=0)



T.pack(padx=0, pady=0)
T.insert(END, "         FIRM-AVP: A Tool for Antiviral\n                   Peptide Prediction","center")


#setup window

def setup_window(soption):
    window = Toplevel(root)
    window.geometry("320x60")
    window.configure(background="tan")
    if soption not in choices:
        Lbl = Label(window, bg="tan",fg="black",text="Please select an option")
        Lbl.config(font=('Helvetica', 8, 'bold'))
        Lbl.pack( )
    else:
        Lbl = Label(window, bg="tan",fg="black",text="Operation is successful!")
        Lbl.config(font=('Helvetica', 8, 'bold'))
        Lbl.pack( )
    
    Btn=Button(window, text="OK", command=window.destroy) 
    Btn.config(font=('Helvetica', 8, 'bold'))
    Btn.pack()

    
#peform operation based on choice    
def submit():
    sf = "%s" % var.get()
    if var.get()==choices[0]:
        op_value=0
        feature_extraction('input_seq',op_value)
        predict_AVP_sequences(op_value)
        
    
    elif var.get()==choices[1]:
        op_value=0
        signvalue=1
        add_new_sequences(op_value,signvalue)
    
    elif var.get()==choices[2]:
        op_value=0
        signvalue=-1
        add_new_sequences(op_value,signvalue)
    
    
    elif var.get()==choices[3]:
        restore_training_data()
    
    
    setup_window(sf)


var = tkinter.StringVar(root)
# initial value
var.set('< Select option>')
choices = ['Predict AVP sequences', \
           'Include new AVP sequences',  \
           
           'Include new non-AVP sequences',  \
           
           'Restore training sets']
option = tkinter.OptionMenu(root, var, *choices)
#option.config(bg = "GREEN")
#helv35=font.Font(family='Helvetica', size=36)
option.config(font=('Helvetica', 8, 'bold')) 
#option["menu"].config(bg="GREEN")
option["menu"].config(font=('Helvetica', 8, 'bold'))
option.pack( padx=10, pady=40)
button = tkinter.Button(root, text="Submit", command=submit)
button.config(font=('Helvetica', 8, 'bold'))
button.pack(padx=10, pady=10)


root.mainloop()