### ML notebook

In [7]:
n_samples = 10000
gens_per_ideal = 2
set_random_seed()

import random

random.seed(6)

In [8]:
# P.<x,y> = PolynomialRing(QQ, 2, order='deglex')
# P.<x,y> = PolynomialRing(QQ, 2, order='lex')
# P.<x,y> = PolynomialRing(QQ, 2, order='degrevlex')
# P.<x,y> = PolynomialRing(QQ, 2, order=TermOrder('wdegrevlex',(1,3)))
# P.<x,y> = PolynomialRing(QQ, 2, order=TermOrder('negwdegrevlex',(1,3)))

#### Functions to create features and label

In [9]:
def is_GB_worth(GB): # Receives a GB
    n_depend1=0
    n_depend2=0
    for pol in GB:
        depend_x=False
        depend_y=False
        
        # Comprobamos si depende de x y de y
        if pol.degree(x) > 0:
            depend_x=True
        if pol.degree(y) > 0:
            depend_y=True
            
        # Actualizamos el numero de de polinomios que dependen de cada cosa
        if depend_x and depend_y:
            n_depend2 += 1
        elif depend_x or depend_y:
            n_depend1 += 1
    if n_depend1 >= n_depend2:
        return True
    else:
        return False
    
def terms_on_x(gens):
    count=0
    for item in gens:
        for exp in item.exponents():
            if exp[0] > 0:
                count+=1
    return count
        
def terms_on_y(gens):
    count=0
    for item in gens:
        for exp in item.exponents():
            if exp[1] > 0:
                count+=1
    return count

def total_terms_diff(gens):
    total=0
    
    for i in range(len(gens)):
        n_terms=len(gens[i].exponents())
        
        if i%2 == 0:
            total+=n_terms
        else:
            total-=n_terms
    
    return abs(total)

def total_degs_diff(gens):
    total = 0
    
    for i in range(len(gens)):
        total_deg_item=0
        for exp in gens[i].exponents():
            aux=exp[0]+exp[1]
            if total_deg_item < aux:
                total_deg_item=aux
        
        if i%2 == 0:
            total+= total_deg_item
        else:
            total-= total_deg_item
            
    return abs(total)
    

In [10]:
from collections import defaultdict

def n_homogeneous_comps(pol):
    dic = defaultdict(pol.parent())
    
    for coeff,monom in pol:
        dic[monom.degree()] += coeff * monom
        
    return len(dic)

#### Voy a generar una lista de ideales para los polinomios generados aleatoriamente y con información adicional

In [11]:
def do_experiment():
    lstOfIdeals = []
    isWorthCount = 0
    for i in range(n_samples):
        generators = []
        n_homogeneous = 0
        total_homogeneous_comps=0

        for j in range(gens_per_ideal):
            pol_degree=randint(2, 10)
            p = P.random_element(pol_degree)

            # homogeneous components for the given polynomial
            total_homogeneous_comps += n_homogeneous_comps(p)

            generators.append(p)
            if p.is_homogeneous():
                n_homogeneous += 1

        # Definimos el ideal a partir de los generadores anteriores
        I = ideal(generators)
        B = I.groebner_basis()
        isWorth = is_GB_worth(B)
        
        if isWorth:
            isWorthCount+=1
        
        lstOfIdeals.append([I, 
                            n_homogeneous,
                            total_degs_diff(generators),
                            total_terms_diff(generators),
                            terms_on_x(generators), 
                            terms_on_y(generators), 
                            terms_on_x(generators) - terms_on_y(generators),
                            total_homogeneous_comps, 
                            isWorth])
        
    print("Worth it bases in the given dataset "+str(isWorthCount))
        
    import numpy as np

    X = np.array([[item[1],item[2],item[3],item[4],item[5],item[6],item[7]] for item in lstOfIdeals])
    y = np.array([item[8] for item in lstOfIdeals])

    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
    
    from sklearn import svm
    
    classifier = svm.SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
                  decision_function_shape='ovr', degree=3, gamma=0.0100000000000000,
                  kernel='rbf', max_iter=-1, probability=False, random_state=None,
                  shrinking=True, tol=0.001, verbose=False)
    
    classifier.fit(X_train, y_train)
    
    preds = classifier.predict(X_test)
    
    hits=0
    fails=0
    for i in range(len(y_test)):
        if y_test[i] == preds[i]:
            hits+=1
        else:
            fails+=1
    
    from sklearn.metrics import classification_report, confusion_matrix
    
    print("Classification report:")
    print(classification_report(y_test,preds))
    
    return hits/(hits+fails) # Return score (between 0 and 1)

In [12]:
iters=1
t_score=0.0

for i in range(iters):
    P.<x,y> = PolynomialRing(QQ, 2, order='lex')
    print('INFO: iter '+str(i+1))
    t_score += do_experiment()
    
avg_score=t_score/iters

INFO: iter 1
Worth it bases in the given dataset 5199
Classification report:
              precision    recall  f1-score   support

       False       0.66      0.60      0.63       973
        True       0.65      0.71      0.68      1027

   micro avg       0.65      0.65      0.65      2000
   macro avg       0.65      0.65      0.65      2000
weighted avg       0.65      0.65      0.65      2000



In [13]:
avg_score

0.654000000000000

### The code below is to perform a grid search to find the best hyperparameters

In [14]:
param_grid = {'C': [0.1,1,10,100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf']}

In [15]:
P.<x,y> = PolynomialRing(QQ, 2, order='lex')
lstOfIdeals = []
isWorthCount = 0
for i in range(n_samples):
    generators = []
    n_homogeneous = 0
    total_homogeneous_comps=0

    for j in range(gens_per_ideal):
        pol_degree=randint(2, 10)
        p = P.random_element(pol_degree)

        # homogeneous components for the given polynomial
        total_homogeneous_comps += n_homogeneous_comps(p)

        generators.append(p)
        if p.is_homogeneous():
            n_homogeneous += 1

    # Definimos el ideal a partir de los generadores anteriores
    I = ideal(generators)
    B = I.groebner_basis()
    isWorth = is_GB_worth(B)

    if isWorth:
        isWorthCount+=1

    lstOfIdeals.append([I, 
                        n_homogeneous,
                        total_degs_diff(generators),
                        total_terms_diff(generators),
                        terms_on_x(generators), 
                        terms_on_y(generators), 
                        terms_on_x(generators) - terms_on_y(generators),
                        total_homogeneous_comps, 
                        isWorth])

print("Worth it bases in the given dataset "+str(isWorthCount))

import numpy as np

X = np.array([[item[1],item[2],item[3],item[4],item[5],item[6],item[7]] for item in lstOfIdeals])
y = np.array([item[8] for item in lstOfIdeals])

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(svm.SVC(),param_grid,refit=True,verbose=2)
grid.fit(X_train,y_train)

print("Best estimator")
print(grid.best_estimator_)

preds = grid.predict(X_test)

print("Confunsion matrix and report:")
print(confusion_matrix(y_test,preds))
print(classification_report(y_test,preds))

Worth it bases in the given dataset 5200
Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] kernel=rbf, C=0.100000000000000, gamma=1 ........................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ......... kernel=rbf, C=0.100000000000000, gamma=1, total=   1.3s
[CV] kernel=rbf, C=0.100000000000000, gamma=1 ........................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.9s remaining:    0.0s


[CV] ......... kernel=rbf, C=0.100000000000000, gamma=1, total=   1.4s
[CV] kernel=rbf, C=0.100000000000000, gamma=1 ........................
[CV] ......... kernel=rbf, C=0.100000000000000, gamma=1, total=   1.3s
[CV] kernel=rbf, C=0.100000000000000, gamma=0.100000000000000 ........
[CV]  kernel=rbf, C=0.100000000000000, gamma=0.100000000000000, total=   1.2s
[CV] kernel=rbf, C=0.100000000000000, gamma=0.100000000000000 ........
[CV]  kernel=rbf, C=0.100000000000000, gamma=0.100000000000000, total=   1.2s
[CV] kernel=rbf, C=0.100000000000000, gamma=0.100000000000000 ........
[CV]  kernel=rbf, C=0.100000000000000, gamma=0.100000000000000, total=   1.2s
[CV] kernel=rbf, C=0.100000000000000, gamma=0.0100000000000000 .......
[CV]  kernel=rbf, C=0.100000000000000, gamma=0.0100000000000000, total=   1.2s
[CV] kernel=rbf, C=0.100000000000000, gamma=0.0100000000000000 .......
[CV]  kernel=rbf, C=0.100000000000000, gamma=0.0100000000000000, total=   1.2s
[CV] kernel=rbf, C=0.100000000000000, ga

[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:  1.6min finished


Best estimator
SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.100000000000000,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
Confunsion matrix and report:
[[608 339]
 [305 748]]
              precision    recall  f1-score   support

       False       0.67      0.64      0.65       947
        True       0.69      0.71      0.70      1053

   micro avg       0.68      0.68      0.68      2000
   macro avg       0.68      0.68      0.68      2000
weighted avg       0.68      0.68      0.68      2000



In [16]:
param_grid = {'C': [2e5,2e7,2e8,2e9,2e10],
              'gamma': [2e-10,2e-9,2e-8,2e-7,2e-6,2e-5],
              'kernel': ['rbf']}

In [17]:
P.<x,y> = PolynomialRing(QQ, 2, order='lex')
lstOfIdeals = []
isWorthCount = 0
for i in range(n_samples):
    generators = []
    n_homogeneous = 0
    total_homogeneous_comps=0

    for j in range(gens_per_ideal):
        pol_degree=randint(2, 10)
        p = P.random_element(pol_degree)

        # homogeneous components for the given polynomial
        total_homogeneous_comps += n_homogeneous_comps(p)

        generators.append(p)
        if p.is_homogeneous():
            n_homogeneous += 1

    # Definimos el ideal a partir de los generadores anteriores
    I = ideal(generators)
    B = I.groebner_basis()
    isWorth = is_GB_worth(B)

    if isWorth:
        isWorthCount+=1

    lstOfIdeals.append([I, 
                        n_homogeneous,
                        total_degs_diff(generators),
                        total_terms_diff(generators),
                        terms_on_x(generators), 
                        terms_on_y(generators), 
                        terms_on_x(generators) - terms_on_y(generators),
                        total_homogeneous_comps, 
                        isWorth])

print("Worth it bases in the given dataset "+str(isWorthCount))

import numpy as np

X = np.array([[item[1],item[2],item[3],item[4],item[5],item[6],item[7]] for item in lstOfIdeals])
y = np.array([item[8] for item in lstOfIdeals])

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(svm.SVC(),param_grid,refit=True,verbose=2)
grid.fit(X_train,y_train)

print("Best estimator")
print(grid.best_estimator_)

preds = grid.predict(X_test)

print("Confunsion matrix and report:")
print(confusion_matrix(y_test,preds))
print(classification_report(y_test,preds))

Worth it bases in the given dataset 5178
Fitting 3 folds for each of 30 candidates, totalling 90 fits
[CV] kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-10 ......


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-10, total=   1.3s
[CV] kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-10 ......


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.9s remaining:    0.0s


[CV]  kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-10, total=   1.3s
[CV] kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-10 ......
[CV]  kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-10, total=   1.3s
[CV] kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-9 .......
[CV]  kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-9, total=   1.2s
[CV] kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-9 .......
[CV]  kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-9, total=   1.2s
[CV] kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-9 .......
[CV]  kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-9, total=   1.1s
[CV] kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-8 .......
[CV]  kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-8, total=   1.0s
[CV] kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-8 .......
[CV]  kernel=rbf, C=200000.000000000, gamma=2.00000000000000e-8, total=   1.0s
[CV] kernel=rbf, C=

[CV]  kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-10, total=   1.1s
[CV] kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-10 ....
[CV]  kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-10, total=   1.0s
[CV] kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-9 .....
[CV]  kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-9, total=   0.7s
[CV] kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-9 .....
[CV]  kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-9, total=   0.6s
[CV] kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-9 .....
[CV]  kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-9, total=   0.7s
[CV] kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-8 .....
[CV]  kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-8, total=   0.7s
[CV] kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-8 .....
[CV]  kernel=rbf, C=2.00000000000000e9, gamma=2.00000000000000e-8, total=   0.7s
[CV] 

[Parallel(n_jobs=1)]: Done  90 out of  90 | elapsed:  3.0min finished


Best estimator
SVC(C=2.00000000000000e8, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0000200000000000000,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
Confunsion matrix and report:
[[605 365]
 [335 695]]
              precision    recall  f1-score   support

       False       0.64      0.62      0.63       970
        True       0.66      0.67      0.67      1030

   micro avg       0.65      0.65      0.65      2000
   macro avg       0.65      0.65      0.65      2000
weighted avg       0.65      0.65      0.65      2000

