# Imports & settings

In [45]:
import numpy as np

In [46]:
def process_training_dataset(file_path):
    """
        Given a path to a dat file containing the generated training data set for our (SVM) clssifier, this function
        processes the generated data in order to adapt it to AMPL optimization problem solver input.
    """

    # Data structures initialization.
    data = []
    labels = []
    
    # Reads the file and processes it.
    with open(file_path, 'r') as file:
        # Reads each line of the dataset.
        for line in file:
            line = line.strip()
            values = line.split()
            
            # Extracts the four dimensional data points.
            point = [float(value) for value in values[:4]]
            data.append(point)
            
            # Extracts the point label.
            label = float(values[4].rstrip('*'))
            labels.append(label)
    
    # Converts the data to numpy arrays.
    A = np.array(data)
    y = np.array(labels)
    
    return A, y

In [47]:
A, y = process_training_dataset('data_generated.dat')
A, y

(array([[0.083, 0.958, 0.053, 0.068],
        [0.265, 0.453, 0.948, 0.951],
        [0.321, 0.267, 0.741, 0.406],
        ...,
        [0.688, 0.562, 0.66 , 0.628],
        [0.889, 0.786, 0.797, 0.931],
        [0.32 , 0.989, 0.137, 0.899]]),
 array([ 1.,  1., -1., -1.,  1., -1., -1.,  1., -1., -1., -1.,  1., -1.,
         1., -1., -1.,  1., -1., -1.,  1., -1.,  1.,  1.,  1., -1., -1.,
         1.,  1., -1., -1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,  1.,  1.,
         1.,  1., -1., -1.,  1., -1.,  1., -1.,  1., -1., -1.,  1., -1.,
        -1.,  1.,  1., -1., -1.,  1.,  1., -1.,  1., -1.,  1.,  1.,  1.,
        -1., -1., -1., -1.,  1.,  1., -1., -1.,  1., -1.,  1., -1., -1.,
         1.,  1.,  1., -1.,  1., -1., -1.,  1., -1.,  1., -1.,  1., -1.,
        -1., -1., -1.,  1., -1.,  1., -1., -1.,  1., -1., -1., -1.,  1.,
         1., -1., -1., -1.,  1.,  1., -1.,  1., -1., -1., -1., -1.,  1.,
         1., -1.,  1., -1., -1.,  1., -1., -1.,  1.,  1.,  1.,  1.,  1.,
         1.,  1.,  1.,  1.,

In [48]:
def training_data_to_AMPL(file_path, A, y):
    """
        Given a file name, the A matrix (points in space) and the y vector (labels), the training data processed using the above function, 
        this function generates the code to express this mathematical objects in AMPL format, and stores them in a .dat file.
    """
    n = len(A)  # Number of data points

    with open(file_path, 'w') as file:
        file.write("param n := {};\n".format(n))
        
        file.write("param A :\n")
        for i in range(n):
            file.write("\t")
            file.write("\t".join("{:.3f}".format(val) for val in A[i]))
            file.write("\n")
        file.write(";\n")
        
        file.write("param y :\n")
        file.write("\t")
        file.write("\t".join("{:.3f}".format(val) for val in y))
        file.write("\n")
        file.write(";\n")


In [49]:
training_data_to_AMPL('svm_training_data.dat', A, y)