### ML notebook

In [1]:
n_samples = 10000
gens_per_ideal = 3

In [2]:
# P.<x,y> = PolynomialRing(QQ, 2, order='deglex')
P.<x,y> = PolynomialRing(QQ, 2, order='lex')
# P.<x,y> = PolynomialRing(QQ, 2, order='degrevlex')
# P.<x,y> = PolynomialRing(QQ, 2, order=TermOrder('wdegrevlex',(1,3)))
# P.<x,y> = PolynomialRing(QQ, 2, order=TermOrder('negwdegrevlex',(1,3)))

#### Functions to create features and label

In [3]:
def is_GB_worth(GB): # Receives a GB
    n_depend1=0
    n_depend2=0
    for pol in GB:
        depend_x=False
        depend_y=False
        
        # Comprobamos si depende de x y de y
        if pol.degree(x) > 0:
            depend_x=True
        if pol.degree(y) > 0:
            depend_y=True
            
        # Actualizamos el numero de de polinomios que dependen de cada cosa
        if depend_x and depend_y:
            n_depend2 += 1
        elif depend_x or depend_y:
            n_depend1 += 1
    if n_depend1 >= n_depend2:
        return True
    else:
        return False
    
def terms_on_x(gens):
    count=0
    for item in gens:
        for exp in item.exponents():
            if exp[0] > 0:
                count+=1
    return count
        
def terms_on_y(gens):
    count=0
    for item in gens:
        for exp in item.exponents():
            if exp[1] > 0:
                count+=1
    return count

In [4]:
from collections import defaultdict

def n_homogeneous_comps(pol):
    dic = defaultdict(pol.parent())
    
    for coeff,monom in pol:
        dic[monom.degree()] += coeff * monom
        
    return len(dic)

#### Voy a generar una lista de ideales para los polinomios generados aleatoriamente y con información adicional

In [5]:
lstOfIdeals = []

for i in range(n_samples):
    generators = []
    n_homogeneous = 0
    total_homogeneous_comps=0
    
    for j in range(gens_per_ideal):
        p = P.random_element(degree=5)
        
        # homogeneous components for the given polynomial
        total_homogeneous_comps += n_homogeneous_comps(p)
        
        generators.append(p)
        if p.is_homogeneous():
            n_homogeneous += 1
    
    # Definimos el ideal a partir de los generadores anteriores
    I = ideal(generators)
    B = I.groebner_basis()
    lstOfIdeals.append([I, 
                        n_homogeneous, 
                        terms_on_x(generators), 
                        terms_on_y(generators), 
                        terms_on_x(generators) - terms_on_y(generators),
                        total_homogeneous_comps, 
                        is_GB_worth(B)])

# Conjunto de polinomios generado de manera aleatoria
lstOfIdeals[:3]

[[Ideal (-13*x^4 - 1/3*x*y^3 + 81*y^3, 3*x^4*y + 8/3*x^2*y^3 + 1, 10/3*x^2*y - 4/3*x^2 + 1/2) of Multivariate Polynomial Ring in x, y over Rational Field,
  0,
  6,
  5,
  1,
  7,
  True],
 [Ideal (-1/5*x^4*y - 3/4*y^3 + 1/2, x^3 + 1/2*x^2*y^3 - 1/3*x^2*y - 1/238*y^4, x^4*y + 1/7*x - y^5 - 1/2*y - 1) of Multivariate Polynomial Ring in x, y over Rational Field,
  0,
  6,
  8,
  -2,
  9,
  True],
 [Ideal (-x^4*y + x^3 - x - 1, x*y^4 - y^5 - 11*y^2, 1/18*x^2*y^2) of Multivariate Polynomial Ring in x, y over Rational Field,
  1,
  5,
  5,
  0,
  7,
  True]]

In [6]:
!sage --pip install numpy scipy sklearn pandas



#### Definimos las features

In [7]:
import numpy as np

X = np.array([[item[1],item[2],item[3],item[4],item[5]] for item in lstOfIdeals])
y = np.array([item[6] for item in lstOfIdeals])

In [8]:
X[:5]

array([[ 0,  6,  5,  1,  7],
       [ 0,  6,  8, -2,  9],
       [ 1,  5,  5,  0,  7],
       [ 0,  9, 10, -1, 13],
       [ 0,  7,  6,  1, 10]])

In [9]:
y[:5]

array([ True,  True,  True,  True,  True])

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [11]:
from sklearn import svm

# classifier = svm.SVC(C=57.55191991686398, 
#                      class_weight='balanced', 
#                      gamma=0.005838467078070335,
#                      kernel='rbf') # It isnt needed because it uses rbf as default
# classifier = svm.SVC()
classifier = svm.SVC(C= 4096, class_weight= None, gamma= 1/8192, kernel= 'rbf')

In [12]:
import scipy

# distributions = {'C': [2**(x) for x in range(-5, 16)],
#                  'gamma': [2**(x) for x in range(-15, 4)],
#                  'kernel': ['rbf'],
#                  'class_weight':['balanced', None]}

# Parameter distributions to use in the randomized search
#distributions={'C': scipy.stats.expon(scale=100), 'gamma': scipy.stats.expon(scale=.1),
# 'kernel': ['rbf', 'linear'], 'class_weight':['balanced', None]}

In [13]:
# Random search is far more efficient than grid search
# from sklearn.model_selection import RandomizedSearchCV

# clf = RandomizedSearchCV(classifier, distributions, random_state=0)
# search = clf.fit(X_train, y_train)
# search.best_params_

In [14]:
classifier.fit(X_train, y_train)

SVC(C=4096, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1/8192, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [15]:
preds = classifier.predict(X_test)

In [16]:
hits=0
fails=0
for i in range(len(y_test)):
    if y_test[i] == preds[i]:
        hits+=1
    else:
        fails+=1

In [17]:
print('Hits: ' + str(hits), 'Fails: '+str(fails))

('Hits: 3116', 'Fails: 184')
