### ML notebook

In [58]:
n_samples = 10000
gens_per_ideal = 2

In [59]:
# P.<x,y> = PolynomialRing(QQ, 2, order='deglex')
P.<x,y> = PolynomialRing(QQ, 2, order='lex')
# P.<x,y> = PolynomialRing(QQ, 2, order='degrevlex')
# P.<x,y> = PolynomialRing(QQ, 2, order=TermOrder('wdegrevlex',(1,3)))
# P.<x,y> = PolynomialRing(QQ, 2, order=TermOrder('negwdegrevlex',(1,3)))

#### Functions to create features and label the each sample

In [60]:
def is_GB_worth(GB): # Receives a GB
    n_depend1=0
    n_depend2=0
    for pol in GB:
        depend_x=False
        depend_y=False
        
        # Comprobamos si depende de x y de y
        if pol.degree(x) > 0:
            depend_x=True
        if pol.degree(y) > 0:
            depend_y=True
            
        # Actualizamos el numero de de polinomios que dependen de cada cosa
        if depend_x and depend_y:
            n_depend2 += 1
        elif depend_x or depend_y:
            n_depend1 += 1
    if n_depend1 >= n_depend2:
        return True
    else:
        return False

In [61]:
def max_deg_on_x(gens):
    maximum = -1
    for item in gens:
        for exp in item.exponents():
            if exp[0] > maximum:
                maximum = exp[0]
    return maximum
        
def max_deg_on_y(gens):
    maximum = -1
    for item in gens:
        for exp in item.exponents():
            if exp[1] > maximum:
                maximum = exp[1]
    return maximum

def max_total_deg(gens):
    return max([f.degree(std_grading=True) for f in gens]) 

def sum_total_deg(gens):
    return sum([f.degree(std_grading=True) for f in gens])

def proportion_on_x(gens):
    count=0
    for item in gens:
        for exp in item.exponents():
            if exp[0] > 0:
                count+=1
    return count/len(gens)
    
def proportion_on_y(gens):
    count=0
    for item in gens:
        for exp in item.exponents():
            if exp[1] > 0:
                count+=1
    return count/len(gens)

def terms_on_x(gens):
    count=0
    for item in gens:
        for exp in item.exponents():
            if exp[0] > 0:
                count+=1
    return count
        
def terms_on_y(gens):
    count=0
    for item in gens:
        for exp in item.exponents():
            if exp[1] > 0:
                count+=1
    return count

In [62]:
from collections import defaultdict

def n_homogeneous_comps(pol):
    dic = defaultdict(pol.parent())
    
    for coeff,monom in pol:
        dic[monom.degree()] += coeff * monom
        
    return len(dic)

#### Voy a generar una lista de ideales para los polinomios generados aleatoriamente y con información adicional

In [63]:
lstOfIdeals = []

for i in range(n_samples):
    generators = []
    n_homogeneous = 0
    total_homogeneous_comps=0
    
    for j in range(gens_per_ideal):
        p = P.random_element(degree=5)
        
        # homogeneous components for the given polynomial
        total_homogeneous_comps += n_homogeneous_comps(p)
        
        generators.append(p)
        if p.is_homogeneous():
            n_homogeneous += 1
    
    # Definimos el ideal a partir de los generadores anteriores
    I = ideal(generators)
    B = I.groebner_basis()
    lstOfIdeals.append([I, 
                        n_homogeneous, 
                        max_deg_on_x(generators), 
                        max_deg_on_y(generators),
                        proportion_on_x(generators),
                        proportion_on_y(generators),
                        abs(terms_on_x(generators) - terms_on_y(generators)),
                        total_homogeneous_comps,
                        max_total_deg(generators),
                        sum_total_deg(generators),
                        is_GB_worth(B)])

# Conjunto de polinomios generado de manera aleatoria
lstOfIdeals[:3]

[[Ideal (1/2*x^4*y + y^5 + y^2 - 1/3, -3/64*x^4 + 3/2*x^2*y^3 + 1/3*x^2*y^2 + 6*x^2*y - 1/4*x*y^3) of Multivariate Polynomial Ring in x, y over Rational Field,
  0,
  4,
  5,
  3,
  7/2,
  1,
  6,
  5,
  10,
  True],
 [Ideal (-x^4 + 8*x*y^2 - y^5 + 1/18*y^2, -2*x^4 - 1/3*x^3*y^2 - x^3*y - 1/5*x*y - 1/9) of Multivariate Polynomial Ring in x, y over Rational Field,
  0,
  4,
  5,
  3,
  3,
  0,
  8,
  5,
  10,
  True],
 [Ideal (3*x^2*y^2 + 1/25*x^2*y - 9/4*x*y^4 - 2/3*x*y - 1, 2/3*x^4*y + 3*x*y - 1/6*x) of Multivariate Polynomial Ring in x, y over Rational Field,
  0,
  4,
  4,
  7/2,
  3,
  1,
  8,
  5,
  10,
  True]]

In [64]:
!sage --pip install numpy scipy sklearn pandas



#### Definimos las features

In [65]:
import numpy as np

X = np.array([[item[1],
               item[2],
               item[3],
               item[4],
               item[5],
               item[6],
               item[7],
               item[8],
               item[9]] for item in lstOfIdeals])
y = np.array([item[10] for item in lstOfIdeals])

In [66]:
X[:5]

array([[ 0. ,  4. ,  5. ,  3. ,  3.5,  1. ,  6. ,  5. , 10. ],
       [ 0. ,  4. ,  5. ,  3. ,  3. ,  0. ,  8. ,  5. , 10. ],
       [ 0. ,  4. ,  4. ,  3.5,  3. ,  1. ,  8. ,  5. , 10. ],
       [ 0. ,  3. ,  3. ,  3.5,  2.5,  2. ,  6. ,  5. ,  8. ],
       [ 0. ,  4. ,  4. ,  2. ,  4. ,  4. ,  6. ,  5. ,  9. ]])

In [67]:
y[:5]

array([ True,  True,  True,  True, False])

In [68]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [69]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
# classifier = SVC(C=57.55191991686398, 
#                      class_weight='balanced', 
#                      gamma=0.005838467078070335,
#                      kernel='rbf') # It isnt needed because it uses rbf as default

# classifier = SVC() #0.641515151515
# classifier= KNeighborsClassifier() #0.603333333333
# classifier=LogisticRegression() #0.640606060606
# classifier=DecisionTreeClassifier() #0.590909090909
# classifier=GaussianNB() #0.617878787879

classifier = SVC(C= 1024, class_weight= 'balanced', gamma= 1/1024, kernel= 'rbf')

# classifier = SVC(C= 57.55191991686398, class_weight= 'balanced', gamma= 0.005838467078070335, kernel= 'rbf')
# classifier = SVC(C= 4096, class_weight= None, gamma= 1/8192, kernel= 'rbf')

In [70]:
import scipy

# distributions = {'C': [2**(x) for x in range(-5, 16)],
#                  'gamma': [2**(x) for x in range(-15, 4)],
#                  'kernel': ['rbf'],
#                  'class_weight':['balanced', None]}

# Parameter distributions to use in the randomized search
# distributions={'C': scipy.stats.expon(scale=100), 'gamma': scipy.stats.expon(scale=.1),
# 'kernel': ['rbf', 'linear'], 'class_weight':['balanced', None]}

In [71]:
# Random search is far more efficient than grid search
# from sklearn.model_selection import RandomizedSearchCV

# clf = RandomizedSearchCV(classifier, distributions, random_state=0)
# search = clf.fit(X_train, y_train)
# search.best_params_

In [72]:
classifier.fit(X_train, y_train)

SVC(C=1024, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1/1024, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [73]:
preds = classifier.predict(X_test)

In [74]:
hits=0
fails=0
for i in range(len(y_test)):
    if y_test[i] == preds[i]:
        hits+=1
    else:
        fails+=1

In [75]:
print('Hits: ' + str(hits), 'Fails: '+str(fails), 'Ratio: '+str((float)(hits/(hits+fails))))

('Hits: 2140', 'Fails: 1160', 'Ratio: 0.648484848485')
