In [None]:
#Display chemical features of each compound in two dimensions using GTM (Generative Topographic Mapping) method
# -*- coding: utf-8 -*- 
import pandas as pd
import matplotlib.figure as figure
import matplotlib.pyplot as plt
from dcekit.generative_model import GTM
import numpy as np

# settings
#GTM map size
shape_of_map = [15, 15]
#Number of RBF
shape_of_rbf_centers = [5, 5]
#RBF variance
variance_of_rbfs = 5
#Lambda in algorithms
lambda_in_em_algorithm = 0.01
#Number of iterations in the algorithm
number_of_iterations = 100
#Learning process display flag
display_flag = 1

#file name
name = "./data/drug_data"
# load of data
data = pd.read_csv(name + ".csv",sep=",",header=0)

#Feature data
data_set = pd.DataFrame(data.drop(["quality","lnROS"],axis=1))

#Class classification data
data_target = pd.DataFrame(data["quality"])
#ValueError: 'c' argument must be a color error handling
data_target.columns = ["0"]

# autoscaling
data_set = (data_set - data_set.mean(axis=0)) / data_set.std(axis=0, ddof=1)
#Convert NaN to 0
data_set = data_set.fillna(0)

# construct GTM model
model = GTM(shape_of_map, shape_of_rbf_centers, variance_of_rbfs, lambda_in_em_algorithm, number_of_iterations,
            display_flag)
model.fit(data_set)

if model.success_flag:
    # calculate of responsibilities
    responsibilities = model.responsibility(data_set)

    # plot the mean of responsibilities
    means = responsibilities.dot(model.map_grids)

    plt.figure(figsize=(30, 30))
    
    plt.scatter(means[:, 0], means[:, 1], c=data_target)
    
    plt.ylim(-1.1, 1.1)
    plt.xlim(-1.1, 1.1)
    plt.xlabel("x-coordinate")
    plt.ylabel("y-coordinate")

    plt.savefig('GTM_chemical_space.jpg') 
    
    plt.show()
    #Display data coordinates in order of input data
    print("means")
    #output z1,z2
    print(means)
    np.savetxt('GTM_chemical_space.csv', means, delimiter=',')
