In [23]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import os
import cv2
import pandas as pd

In [24]:
def preprocess_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    resized = cv2.resize(threshold, (28, 28))
    normalized = resized / 255.0
    reshaped = np.reshape(normalized, (28, 28, 1))
    return reshaped

In [25]:
img_path = os.listdir('../img_out/')
name_list = []

# Model / data parameters
num_classes = 4
input_shape = (28, 28, 1)

image_data = []

for img in img_path:
    name_list.append(img.split(".")[0])

    # Carregar a imagem de entrada
    image = cv2.imread(f'../img_out/{img}')

    # Pré-processar a imagem
    preprocessed_image = preprocess_image(image)

    image_data.append(preprocessed_image)

data = pd.read_csv(r"../SimpleEQ.csv",quotechar="'",sep=";",names=["latex","opr","img_name"])

df_filtered = data.loc[data['img_name'].isin(name_list)]

x = np.array(image_data)
y = np.array(pd.get_dummies(df_filtered["opr"]))


In [26]:
test_size = 0.1
x_train = x[0:int(len(x)*(1-test_size))]
x_test = x[int(len(x)*(1-test_size)+1):]

y_train = y[0:int(len(y)*(1-test_size))]
y_test = y[int(len(y)*(1-test_size)+1):]

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

x_train shape: (91265, 28, 28, 1, 1)
91265 train samples
10140 test samples
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 13, 13, 32)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 5, 5, 64)         0         
 2D)                                                             
                                                                 
 flatten_3 (Flatten)         (None, 1600)              0         
                                            

In [16]:
img_path = os.listdir('../img_out/')
name_list = []

for img in img_path:
    name_list.append(img.split(".")[0])

data = pd.read_csv(r"../SimpleEQ.csv",quotechar="'",sep=";",names=["latex","opr","img_name"])

df_filtered = data.loc[data['img_name'].isin(name_list)]
data.loc[~data['img_name'].isin(df_filtered['img_name'])].sort_index(ascending=False).to_csv(r"../RemainEQ.csv",sep=";",index=False,header=False)

In [47]:
test_size = 0.3
x_train = x[0:int(len(x)*(1-test_size))]
x_test = x[int(len(x)*(1-test_size)+1):]

y_train = y[0:int(len(y)*(1-test_size))]
y_test = y[int(len(y)*(1-test_size)+1):]

x

array([[[[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        ...,

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]]],


       [[[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        ...,

        [[0.],
 

In [50]:
x_train.shape
x_test.shape

(14585, 28, 28, 1)

In [22]:
data = pd.read_csv(r"../SimpleEQ.csv",quotechar="'",sep=";",names=["latex","opr","img_name"])
data.shape

(489720, 3)

In [23]:
df_filtered = data.loc[data['img_name'].isin(name_list)]
df_filtered.shape

(48620, 3)

In [42]:
y.shape

(48620, 4)

In [43]:
np_img_data = np.array(image_data)


In [2]:
print(x_train.shape)
print(type(x_train))
print(np_img_data.shape)
print(type(np_img_data))

(60000, 28, 28, 1)
<class 'numpy.ndarray'>


NameError: name 'np_img_data' is not defined

In [73]:
y_train.shape
lista = [n for n in range(4)]
arr = np.array(lista)
np.reshape(arr,(-1))

array([0, 1, 2, 3])

In [10]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
print(y_train.shape)
print(y_test.shape)

(60000,)
(10000,)


In [1]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 5, 5, 64)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 1600)              0         
                                                 

In [37]:
print(y_train.shape)
# print(np.array(image_data).shape)

(60000, 10)


# Testes

In [None]:
import tensorflow as tf
import cv2

In [None]:
import pandas as pd
import numpy as np
import cv2
import matplotlib.pylab as plt
import os
os.add_dll_directory(r'c:\users\luiz souza\anaconda3\envs\latex\lib\site-packages')
import pytesseract
import sympy
# import tensorflow as tf
# from tensorflow import keras
# from tqdm.notebook import tqdm

In [None]:
df = pd.read_csv('teste.csv',sep=';',quotechar="'")
df.to_dict()

In [None]:
for n,v in zip(["a","B","x"],[1,2,3]):
    print(str(f"'{n}{v}+{n}';soma;soma_numero"))

In [None]:
def Tex2fig(serie:pd.Series):
    latex = serie[0]
    img_name = serie[2]

    plt.figure(dpi=25)
    fig, ax = plt.subplots()

    left, width = .25, .5
    bottom, height = .25, .5
    right = left + width
    top = bottom + height

    # Fractions and radicals
    ax.text(0.5 * (left + right), 0.5 * (bottom + top), rf"${latex}$",
            horizontalalignment='center',
            verticalalignment='center',
            transform=ax.transAxes,
            fontsize=150)

    #Adjusting the ticks size of the axes
    plt.xlim(0,2)
    plt.ylim(0,2)


    # plt.show()
    plt.grid(False)
    plt.axis('off')
    ax.set_axis_off()
    plt.savefig(f'{img_name}.png',bbox_inches='tight')

In [None]:
# Using LaTex for the rendering of the mathematical expression
plt.rcParams.update({
    'text.usetex': True,
    "font.family": "monospace",
    "font.monospace": 'Computer Modern Typewriter'
})

#Adjusting the resolution of the plot

plt.figure(dpi=25)
fig, ax = plt.subplots()

left, width = .25, .5
bottom, height = .25, .5
right = left + width
top = bottom + height

# Fractions and radicals
ax.text(0.5 * (left + right), 0.5 * (bottom + top), r'$33x+222y$',
        horizontalalignment='center',
        verticalalignment='center',
        transform=ax.transAxes,
        fontsize=150)

# plt.text(1,1,r'$\frac{x}{5}$',fontsize=200)

#Adjusting the ticks size of the axes
plt.xlim(0,2)
plt.ylim(0,2)


# plt.show()
plt.grid(False)
plt.axis('off')
ax.set_axis_off()
plt.savefig('Teste.png',bbox_inches='tight')

In [None]:
print(tf.__version__)
print(keras.__version__)

In [None]:
import cv2

import os


# Carregar a imagem
imagem = cv2.imread('../data/Emc2.png')

# Converter a imagem para escala de cinza
imagem_cinza = cv2.cvtColor(imagem, cv2.COLOR_BGR2GRAY)

# Aplicar limiarização na imagem para melhorar a qualidade do texto
_, imagem_limiarizada = cv2.threshold(imagem_cinza, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# Aplicar OCR para extrair o texto da imagem
texto = pytesseract.image_to_string(imagem_limiarizada)

# Remover espaços em branco e quebras de linha
texto = texto.replace(' ', '').replace('\n', '')

# Analisar a expressão matemática usando SymPy
expr = sympy.sympify(texto)

# Identificar a operação matemática
operacao = ''
if isinstance(expr, sympy.Add):
    operacao = 'adição'
elif isinstance(expr, sympy.Mul):
    operacao = 'multiplicação'
elif isinstance(expr, sympy.Pow):
    operacao = 'potenciação'
elif isinstance(expr, sympy.Add):
    operacao = 'adição'
elif isinstance(expr, sympy.Add):
    operacao = 'adição'
elif isinstance(expr, sympy.Add):
    operacao = 'adição'
elif isinstance(expr, sympy.Add):
    operacao = 'adição'
elif isinstance(expr, sympy.Add):
    operacao = 'adição'
elif isinstance(expr, sympy.Add):
    operacao = 'adição'

# Gerar o código LaTeX correspondente
codigo_latex = sympy.latex(expr)

# Imprimir a operação identificada e o código LaTeX
print(f"A operação identificada é: {operacao}")
print(f"Código LaTeX: {codigo_latex}")


In [None]:
import warnings
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.exceptions import ConvergenceWarning
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

# Load data from https://www.openml.org/d/554
X, y = fetch_openml(
    "mnist_784", version=1, return_X_y=True, as_frame=False, parser="pandas"
)
X = X / 255.0

# Split data into train partition and test partition
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.7)

mlp = MLPClassifier(
    hidden_layer_sizes=(40,),
    max_iter=8,
    alpha=1e-4,
    solver="sgd",
    verbose=10,
    random_state=1,
    learning_rate_init=0.2,
)

# this example won't converge because of resource usage constraints on
# our Continuous Integration infrastructure, so we catch the warning and
# ignore it here
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
    mlp.fit(X_train, y_train)

print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))

fig, axes = plt.subplots(4, 4)
# use global min / max to ensure all weights are shown on the same scale
vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()
for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):
    ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=0.5 * vmin, vmax=0.5 * vmax)
    ax.set_xticks(())
    ax.set_yticks(())

plt.show()

In [None]:
def load_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.io.decode_jpeg(img, channels=3)
    img = tf.keras.layers.Resizing(299, 299)(img)
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    return img, image_path

In [None]:
# train = pd.read_csv('../im2Latex_Kaggle/im2latex_train.csv',sep=',',quotechar='"')
train = pd.read_csv("../im2Latex_Kaggle/im2latex_train.csv", sep=",", quotechar='"')

In [None]:
train

In [None]:
def img2numpy(fname: str) -> np.ndarray:
    fpath = rf"../im2Latex_Kaggle/formula_images_processed/{fname}"
    img = cv2.imread(fpath, cv2.IMREAD_COLOR)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return img_gray.shape

In [None]:
cont = 0
size = 0
for img in train['image']:
    if cont < 5000:
        h,w = img2numpy(img)
        img_size = h*w
        if img_size > size:
            size = img_size
            print(img_size,h,w)
        cont += 1
    else:
        break


In [None]:
train = np.loadtxt('../im2Latex_Kaggle/im2latex_train.csv',delimiter=',',dtype=str,quotechar='"',usecols=(0,-1))

In [None]:
train.shape

In [None]:
df_train = pd.DataFrame(train[1:],columns=train[0])

In [None]:
def img2numpy(fname:str) -> np.ndarray:
    fpath = rf"../im2Latex_Kaggle/formula_images_processed/{fname}"
    img = cv2.imread(fpath,cv2.IMREAD_COLOR)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return img_gray

In [None]:
img_gray.reshape(-1)

In [None]:
np.stack([train[1],[img_gray.reshape(-1)]])

In [None]:
a = np.array([[1,2,3], [4,5,6],[7,8,9]])
a

In [None]:
print(np.reshape(img,-1).shape)
print(np.reshape(img_gray,-1).shape)


In [None]:
print(img_gray.reshape(-1).ndim)
print(train[1].ndim)

In [None]:
np.column_stack([train[1],[img_gray]])

In [None]:
print(type(train))
print(type(img_gray))