# Create square root data

In [2]:
import math
import random
import os

In [3]:
def truncate_to_4_digit(x):
    return math.floor(x * 10000) / 10000

def create_sqrt_data(filename, total_num_examples=10000):
    with open(filename, 'w') as f:
        for i in range(total_num_examples):
            x = random.uniform(1, 10)
            x_trunc = truncate_to_4_digit(x)
            y = math.sqrt(x_trunc)
            y_trunc = truncate_to_4_digit(y)
            f.write(f'sqrt({x_trunc})={y_trunc}\n')

In [4]:
total_num_examples = 10000
input_file_path = f'train_sqrt_{total_num_examples}.txt'
if not os.path.exists(input_file_path):
    create_sqrt_data(input_file_path, total_num_examples=total_num_examples)
else:
    print(f'File {input_file_path} already exists')

total_num_examples = 20000
input_file_path = f'train_sqrt_{total_num_examples}.txt'
if not os.path.exists(input_file_path):
    create_sqrt_data(input_file_path, total_num_examples=total_num_examples)
else:
    print(f'File {input_file_path} already exists')

total_num_examples = 40000
input_file_path = f'train_sqrt_{total_num_examples}.txt'
if not os.path.exists(input_file_path):
    create_sqrt_data(input_file_path, total_num_examples=total_num_examples)
else:
    print(f'File {input_file_path} already exists')

File train_sqrt_10000.txt already exists


In [4]:
# create non-overlapping data
total_num_examples = 10000
input_file_path = f'train_sqrt_{total_num_examples}.txt'
output_file_path = f'train_sqrt_{total_num_examples}_nonoverlap.txt'

if not os.path.exists(output_file_path):
    lines_to_remove = set()
    with open(input_file_path, 'r') as f:
        for line in f.readlines():
            lines_to_remove.add(line)

    print(len(lines_to_remove))

    with open(output_file_path, 'w') as f:
        for x in range(10000, 100000):
            x = x / 10000
            x_trunc = truncate_to_4_digit(x)
            y = math.sqrt(x_trunc)
            y_trunc = truncate_to_4_digit(y)
            line_to_add = f'sqrt({x_trunc})={y_trunc}\n'
            if line_to_add in lines_to_remove:
                lines_to_remove.remove(line_to_add)
            else:
                f.write(line_to_add)

    print(len(lines_to_remove))


In [5]:
# shuffle and create a smaller version
# total_num_examples = 10000
# input_file_path = f'train_sqrt_{total_num_examples}_nonoverlap.txt'
# num_test_samples = 10000

# with open(input_file_path, 'r') as f:
#     lines = f.readlines()
#     random.shuffle(lines)
#     with open(f'test_sqrt_{num_test_samples}.txt', 'w') as f2:
#         for line in lines[:num_test_samples]:
#             f2.write(line)

In [6]:
# shuffle and create a smaller version
# total_num_examples = 10000
# input_file_path = f'train_sqrt_{total_num_examples}_nonoverlap.txt'
# num_test_samples = 1000

# with open(input_file_path, 'r') as f:
#     lines = f.readlines()
#     random.shuffle(lines)
#     with open(f'test_sqrt_{num_test_samples}.txt', 'w') as f2:
#         for line in lines[:num_test_samples]:
#             f2.write(line)

In [7]:
# shuffle and create a smaller version
# total_num_examples = 10000
# input_file_path = f'train_sqrt_{total_num_examples}_nonoverlap.txt'
# num_test_samples = 100

# with open(input_file_path, 'r') as f:
#     lines = f.readlines()
#     random.shuffle(lines)
#     with open(f'test_sqrt_{num_test_samples}.txt', 'w') as f2:
#         for line in lines[:num_test_samples]:
#             f2.write(line)

In [1]:
import random
import os
def get_subset_train_data(num_samples):
    with open('train_sqrt_10000.txt', 'r') as f:
        lines = f.readlines()
    random.shuffle(lines)
    if os.path.exists(f'train_sqrt_{num_samples}.txt'):
        print('file exists!')
        return
    
    with open(f'train_sqrt_{num_samples}.txt', 'w') as f:
        for line in lines[:num_samples]:
            f.write(line)

get_subset_train_data(1000)
get_subset_train_data(3000)
get_subset_train_data(5000)

# Let's make an algorithmic reasomic-like function (Newton's method based) to calculate the square root of a number

In [8]:
import math 

def truncate_to_n_digit(x, n=4):
    return math.floor(x * (10 ** n)) / (10 ** n)

x = 128
def list_to_string(a):
    a = str(a)
    return a.replace(' ', '')

def num_to_list(num):
    return [int(x) for x in str(num)]

list_x = num_to_list(x)
print(list_x)
print(list_to_string(list_x))

[1, 2, 8]
[1,2,8]


In [9]:
def get_input_string(x: float, operator='sqrt'):
    x_trunc = truncate_to_4_digit(x)
    input_str = f'Input:\n{operator}({x_trunc})\n'
    input_str += f'Target:\n'

    return input_str

print(get_input_string(1.23456789))

Input:
sqrt(1.2345)
Target:



In [10]:
def get_input_string(x: float, operator='sqrt'):
    x_trunc = truncate_to_4_digit(x)
    input_str = f'Input:\n{operator}({x_trunc})\n'
    input_str += f'Target:\n'

    return input_str


def get_output_string(x,y=0, n=5):
    output_str = f'<scratch>\n'

    a = x
    x_true = truncate_to_n_digit(math.sqrt(a), 4)
    this_x = x_true

    if this_x >= 1:
        this_x = int(this_x)
    else:
        this_x = 0.1
    output_str += f'x_0={this_x}\n'

    for i in range(1, n+1):
        x_i =this_x

        this_x = 0.5 * (this_x + a / this_x)
        this_x = truncate_to_n_digit(this_x, 4)

        output_str += f'x_{i}: 1/2*({x_i}+{a}/{x_i})={this_x}, x_{i}={this_x}'

        if not i == n:
            output_str += '\n'

    output_str += ' , END\n</scratch>\n'
    
    output_str += f'{this_x}\n'

    return output_str[:-1]+'\n'

In [11]:
max_error = 0
for i in range(1000000):
    x = random.uniform(1, 10)
    x_trunc = truncate_to_4_digit(x)
    y = math.sqrt(x_trunc)
    y_trunc = truncate_to_4_digit(y)
    newton_y = get_output_string(x_trunc, n=4)
    newton_y = float(newton_y.split('\n')[-2])
    error = abs(y_trunc - newton_y)
    
    if error > max_error:
        max_error = error
        print(f'x={x_trunc}, y={y_trunc}, newton_y={newton_y}, error={error}')

print(f'max error = {max_error}')

x=3.9026, y=1.9754, newton_y=1.9755, error=9.999999999998899e-05
x=1.2544, y=1.1199, newton_y=1.12, error=0.00010000000000021103
max error = 0.00010000000000021103
