In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import timeit
import zipfile
from numpy import genfromtxt
import pandas as pd
import seaborn as sns

In [2]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures

In [3]:
cdir = os.getcwd()
print(cdir)
zip_dir = '/home/vazumah/AIML24787-Project/'
os.chdir(zip_dir)

/home/vazumah/AIML24787-Project


In [4]:
with zipfile.ZipFile('ML_Final_Project_Data.zip', 'r') as zip_ref:
    zip_ref.extractall(os.getcwd())

In [5]:
def nucleotide_pore_data(name='mxene'):
    
    # load adenosine dataset
    A_x = genfromtxt(f'./ML Final Project Data/a_{name}.csv',delimiter=',')
    A_x_clean = A_x[1:, 1:]
    A_y = np.zeros([A_x_clean.shape[0],1])
    A_x_p= np.zeros([A_x_clean.shape[0],1])  # 'Purine'
    
    # load cytosine dataset
    C_x = genfromtxt(f'./ML Final Project Data/c_{name}.csv',delimiter=',')
    C_x_clean = C_x[1:, 1:]
    C_y = np.ones([C_x_clean.shape[0],1])
    C_x_p= np.ones([C_x_clean.shape[0],1]) #'Pyrimidine'
    
    # load guanine dataset
    G_x = genfromtxt(f'./ML Final Project Data/g_{name}.csv',delimiter=',')
    G_x_clean = G_x[1:, 1:]
    G_y = np.zeros([G_x_clean.shape[0],1])
    G_y[:]= 2
    G_x_p= np.zeros([G_x_clean.shape[0],1]) #'Purine'
    
    #load thyiamine dataset
    T_x = genfromtxt(f'./ML Final Project Data/t_{name}.csv',delimiter=',')
    T_x_clean = T_x[1:, 1:]
    T_y = np.zeros([T_x_clean.shape[0],1])
    T_y[:] = 3
    T_x_p= np.ones([T_x_clean.shape[0],1]) #'Pyrimidine'
    
    X = np.concatenate((A_x_clean, C_x_clean,G_x_clean,T_x_clean), axis=0)
    y = np.concatenate((A_y, C_y,G_y,T_y), axis=0)
    dna_class = np.concatenate((A_x_p, C_x_p,G_x_p,T_x_p), axis=0)
    Xeng = np.concatenate((X,dna_class), axis=1)
    scaler = MinMaxScaler(feature_range=(0, 1))
    X = scaler.fit_transform(X)
    Xeng = scaler.fit_transform(Xeng)
    
    return X, y, dna_class, Xeng

In [6]:
Xxene, yxene, dna_xene, Xxeng = nucleotide_pore_data(name='mxene')
(X_train, X_test, y_train, y_test) = train_test_split(Xxene, yxene.ravel(), test_size =  0.3, random_state=300)

In [7]:
svm_model_l = svm.SVC(kernel='linear')
svm_model_r = svm.SVC()
svm_model_s = svm.SVC(kernel='sigmoid')
svm_model_p = svm.SVC(kernel='poly', degree=5)

In [8]:
svm_model_lf= svm_model_l.fit(X_train, y_train)
svm_model_l_res_pred = svm_model_l.predict(X_test)
svm_model_l_score = accuracy_score(y_test, svm_model_l_res_pred)

svm_model_rf= svm_model_r.fit(X_train, y_train)
svm_model_r_res_pred = svm_model_r.predict(X_test)
svm_model_r_score = accuracy_score(y_test, svm_model_r_res_pred)

svm_model_sf= svm_model_s.fit(X_train, y_train)
svm_model_s_res_pred = svm_model_s.predict(X_test)
svm_model_s_score = accuracy_score(y_test, svm_model_s_res_pred)

svm_model_pf= svm_model_p.fit(X_train, y_train)
svm_model_p_res_pred = svm_model_p.predict(X_test)
svm_model_p_score = accuracy_score(y_test, svm_model_p_res_pred)