## Applying machine learning to number theory problem

Sato-tate group classification problem:

Given normalized Frobenius traces, is it possible to 
- determine whether Sato-Tate group  is USp(4)?
- Extended problem: Classifying specific type of Sato-Tate group in the LMF (6 classes)

#### Some applicable machine learning techniques
- Multi-layer perceptron
- Naive Bayes classification

### Package requirements

In [1]:
import numpy as np
import json
from sklearn import preprocessing
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
import torch
import torch.nn as nn

### Processing data

In [2]:
with open('./g2st.txt') as f:
    data = [l for l in f.readlines()]

In [13]:
data[3]

'0:[-11648,0,-36324,0,-28647,0,-2376]:4314388037048:[0.000000,0.000000,0.000000,0.000000,0.742781,1.436842,-1.315192,2.498780,0.000000,2.917300,-1.098885,-0.520756,1.536443,-1.466033,0.474713,0.000000,1.350105,-1.317171,-0.847998,0.000000,-0.796030,-0.394132,-1.546778,1.149392,0.752577,-1.064828,1.397926,-0.341743,2.035653,1.638464,0.325515,1.276939,2.193129,0.619059,1.824686,-0.597948,0.594635,0.000000,-2.591337,-0.284988,0.850657,1.652228,1.071439,-0.265489,-0.528655,0.524097,1.034954,-1.545976,2.019822,-1.996105,1.233253,-1.219422,-1.214913,0.000000,0.238620,-0.475551,-3.037872,-0.913168,-0.226819,-2.034840,-0.898650,0.659580,-2.832620,0.000000,-0.642345,-2.554782,-0.422224,2.714378,1.656897,0.616399,1.430733,1.216848,-1.204525,0.998752,0.395575,1.172477,0.389896,-1.541386,0.576683,-1.145458,0.380091,0.566315,-2.058233,-0.931493,0.371792,0.000000,-0.365529,0.000000,1.805175,0.537194,-1.426809,2.482156,-2.453405,-1.399262,2.235655,0.000000,-0.508456,1.348639,-0.670755,1.339158]\n'

In [5]:
### preprocessing and Spliting data
def split_data_train_test(data, long_input=True, normalized_data=True, test_size=0.2):
    datapoints = [point.split(':') for point in data]
    labels_data = [int(point[0]) for point in datapoints]
    if long_input:
        input_data = [json.loads(point[3]) for point in datapoints]
    else:
        input_data = [json.loads(point[1]) for point in datapoints]
    input_data_pd = pd.DataFrame(input_data)
    if not long_input:
        input_data_pd[6] = input_data_pd[6].fillna(0)

    if normalized_data:
        input_data_pd = preprocessing.normalize(input_data_pd)
        input_data_pd = pd.DataFrame(input_data_pd, columns=input_data_pd.columns)
    
    X = input_data_pd.to_numpy()

    X_train, X_test, y_train, y_test = train_test_split(X, labels_data, test_size=0.2)
    return X_train, X_test, y_train, y_test

class PolynomialDataset(Dataset):
    def __init__(self, X, Y):
        self.input_data = X
        self.labels = Y

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.input_data[idx], self.labels[idx]

### Creating dataset object

In [6]:
X_train, X_test, y_train, y_test = split_data_train_test(data, normalized_data=False)

In [29]:
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
train_dataset = PolynomialDataset(X_train, y_train)
test_dataset = PolynomialDataset(X_test, y_test)

batch_size = 50
n_iters = 20000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

In [30]:
class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedforwardNeuralNetModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim) 
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(hidden_dim, output_dim)  
        
        
        # Define batch norm
        self.batch_norm = nn.BatchNorm1d(hidden_dim)

        # Define proportion or neurons to dropout
        self.dropout = nn.Dropout(0.2)


    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.relu3(out)
        out = self.fc4(out)
        return out

In [31]:
input_dim = 100
hidden_dim = 200
output_dim = 2

model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
learning_rate = 0.01

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
criterion = nn.CrossEntropyLoss()

In [32]:
device = torch.device("cpu")#("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [33]:
iter = 0

for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = torch.tensor(inputs).requires_grad_()
        inputs = inputs.to(device)
        # Load images with gradient accumulation capabilities
        #images = images.view(-1, 28*28).requires_grad_()
        labels = labels.to(device)
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(inputs)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for inputs, labels in test_loader:
                inputs = torch.tensor(inputs).requires_grad_()
                inputs = inputs.to(device)
                # Load images with gradient accumulation capabilities
                #images = images.view(-1, 28*28).requires_grad_()
                labels = labels.to(device)
                # Forward pass only to get logits/output
                outputs = model(inputs)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()

            accuracy = 100 * float(correct) / float(total)

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))


  """


Iteration: 500. Loss: 0.5968632698059082. Accuracy: 57.4275
Iteration: 1000. Loss: 0.5118539333343506. Accuracy: 83.948
Iteration: 1500. Loss: 0.41208088397979736. Accuracy: 90.553
Iteration: 2000. Loss: 0.24629293382167816. Accuracy: 94.74
Iteration: 2500. Loss: 0.1650465726852417. Accuracy: 96.563
Iteration: 3000. Loss: 0.13303349912166595. Accuracy: 97.0475
Iteration: 3500. Loss: 0.10587829351425171. Accuracy: 97.439
Iteration: 4000. Loss: 0.05692749843001366. Accuracy: 97.5775
Iteration: 4500. Loss: 0.04783552512526512. Accuracy: 97.684
Iteration: 5000. Loss: 0.11418955773115158. Accuracy: 97.616
Iteration: 5500. Loss: 0.21382589638233185. Accuracy: 97.8205
Iteration: 6000. Loss: 0.05707945302128792. Accuracy: 97.94
Iteration: 6500. Loss: 0.1571904718875885. Accuracy: 97.866
Iteration: 7000. Loss: 0.06765960901975632. Accuracy: 98.021
Iteration: 7500. Loss: 0.03050486370921135. Accuracy: 98.041
Iteration: 8000. Loss: 0.0990460216999054. Accuracy: 98.1035
Iteration: 8500. Loss: 0.10