In [1]:
import numpy as np
import pandas as pd
from utils import get_data_as_list
import re

In [2]:
data_list = get_data_as_list(day=5)
data_list[:15]

['989,854 -> 521,854',
 '831,695 -> 402,266',
 '38,805 -> 306,537',
 '802,24 -> 802,824',
 '951,478 -> 951,758',
 '508,987 -> 508,868',
 '602,246 -> 108,246',
 '765,781 -> 76,92',
 '248,757 -> 644,361',
 '296,987 -> 296,958',
 '240,430 -> 240,36',
 '672,530 -> 156,14',
 '920,19 -> 425,514',
 '737,389 -> 859,389',
 '762,364 -> 468,364']

In [3]:
def get_df_from_list(data_list):
    df = pd.DataFrame({'values': data_list})
    
    # split into first and second set of coordinates
    df = df['values'].str.split(' -> ', expand=True)
    
    # split into x and y coordinates
    df_1 = df[0].str.split(',', expand=True)
    df_1 = df_1.rename(columns={0: 'x1', 1: 'y1'})
    df_2 = df[1].str.split(',', expand=True)
    df_2 = df_2.rename(columns={0: 'x2', 1: 'y2'})
    df = pd.concat([df_1, df_2], axis=1)
    df = df.astype(int)
    return df

In [4]:
df = get_df_from_list(data_list)

In [5]:
df

Unnamed: 0,x1,y1,x2,y2
0,989,854,521,854
1,831,695,402,266
2,38,805,306,537
3,802,24,802,824
4,951,478,951,758
...,...,...,...,...
495,949,681,350,681
496,256,561,746,561
497,242,119,608,119
498,916,883,410,377


In [6]:
df.max()

x1    989
y1    989
x2    988
y2    987
dtype: int32

functions for finding vertical and horizonal lines given the start and end point

In [7]:
def find_direction(two_values):
    if two_values[1] > two_values[0]:
        # steps are growing
        step = 1
    elif two_values[1] < two_values[0]:
        step = -1
    else:
        step = 1
    return step

In [8]:
def find_line_hor_ver_only(start_end_coordinates):
    """given the start (x1, y1) and end coordinates (x2, y2), find all the points in between 
    and return them as a df of x and y coordinates
    """
    # ignore diagonal lines
    if (start_end_coordinates['x1'] != start_end_coordinates['x2']) and \
    (start_end_coordinates['y1'] != start_end_coordinates['y2']):
        return
    
    if start_end_coordinates['x1'] == start_end_coordinates['x2']:
        step = find_direction([start_end_coordinates['y1'], start_end_coordinates['y2']])
        y = np.arange(start=start_end_coordinates['y1'], stop=start_end_coordinates['y2'] + step, step=step)
        x = [start_end_coordinates['x1']] * len(y)
    elif start_end_coordinates['y1'] == start_end_coordinates['y2']:
        step = find_direction([start_end_coordinates['x1'], start_end_coordinates['x2']])
        x = np.arange(start=start_end_coordinates['x1'], stop=start_end_coordinates['x2'] + step, step=step)
        y = [start_end_coordinates['y1']] * len(x)
    
    df = pd.DataFrame({'x': x, 'y': y})
    
    return df

find all the points of all the vertical and horizontal lines

In [9]:
points_df = pd.DataFrame(columns = ['x', 'y'])
for ind in np.arange(len(df)):
    points = find_line_hor_ver_only(df.loc[ind, :])
    points_df = points_df.append(points)
points_df = points_df.astype(int)
points_df = points_df.reset_index(drop=True)
points_df.head(10)

Unnamed: 0,x,y
0,989,854
1,988,854
2,987,854
3,986,854
4,985,854
5,984,854
6,983,854
7,982,854
8,981,854
9,980,854


now make a 'field' and mark all these points we found above.
with values going up to 990, assume we have a field of 1000 by 1000, with indices from 0 to 999

In [10]:
field = np.zeros((1000, 1000))

In [11]:
for ind in np.arange(len(points_df)):
    x = points_df.loc[ind, 'x']
    y = points_df.loc[ind, 'y']
    field[x, y] = field[x, y] + 1

In [12]:
solution = len(np.where(field > 1)[0])
solution

5169

# second part


In [13]:
def find_line(start_end_coordinates):
    """given the start (x1, y1) and end coordinates (x2, y2), find all the points in between 
    and return them as a df of x and y coordinates
    including diagonals
    """
    if start_end_coordinates['x1'] == start_end_coordinates['x2']:
        step = find_direction([start_end_coordinates['y1'], start_end_coordinates['y2']])
        y = np.arange(start=start_end_coordinates['y1'], stop=start_end_coordinates['y2'] + step, step=step)
        x = [start_end_coordinates['x1']] * len(y)
    elif start_end_coordinates['y1'] == start_end_coordinates['y2']:
        step = find_direction([start_end_coordinates['x1'], start_end_coordinates['x2']])
        x = np.arange(start=start_end_coordinates['x1'], stop=start_end_coordinates['x2'] + step, step=step)
        y = [start_end_coordinates['y1']] * len(x)
    else:
        step = find_direction([start_end_coordinates['x1'], start_end_coordinates['x2']])
        x = np.arange(start=start_end_coordinates['x1'], stop=start_end_coordinates['x2'] + step, step=step)
        step = find_direction([start_end_coordinates['y1'], start_end_coordinates['y2']])
        y = np.arange(start=start_end_coordinates['y1'], stop=start_end_coordinates['y2'] + step, step=step)
    
    df = pd.DataFrame({'x': x, 'y': y})
    
    return df

In [14]:
points_df = pd.DataFrame(columns = ['x', 'y'])
for ind in np.arange(len(df)):
    points = find_line(df.loc[ind, :])
    points_df = points_df.append(points)
points_df = points_df.astype(int)
points_df = points_df.reset_index(drop=True)
points_df.head(10)

Unnamed: 0,x,y
0,989,854
1,988,854
2,987,854
3,986,854
4,985,854
5,984,854
6,983,854
7,982,854
8,981,854
9,980,854


In [15]:
field2 = np.zeros((1000, 1000))

In [16]:
for ind in np.arange(len(points_df)):
    x = points_df.loc[ind, 'x']
    y = points_df.loc[ind, 'y']
    field2[x, y] = field2[x, y] + 1

In [17]:
solution = len(np.where(field2 > 1)[0])
solution

22083

# test data

In [18]:
test_data = ['0,9 -> 5,9', '8,0 -> 0,8', '9,4 -> 3,4', '2,2 -> 2,1', '7,0 -> 7,4', '6,4 -> 2,0', 
             '0,9 -> 2,9', '3,4 -> 1,4', '0,0 -> 8,8', '5,5 -> 8,2']

In [19]:
test_df = get_df_from_list(test_data)
test_df.head()

Unnamed: 0,x1,y1,x2,y2
0,0,9,5,9
1,8,0,0,8
2,9,4,3,4
3,2,2,2,1
4,7,0,7,4


### part 1

In [20]:
test_points_df = pd.DataFrame(columns = ['x', 'y'])
for ind in np.arange(len(test_df)):
    points = find_line_hor_ver_only(test_df.loc[ind, :])
    test_points_df = test_points_df.append(points)
test_points_df = test_points_df.astype(int)
test_points_df = test_points_df.reset_index(drop=True)
test_points_df.head(10)

Unnamed: 0,x,y
0,0,9
1,1,9
2,2,9
3,3,9
4,4,9
5,5,9
6,9,4
7,8,4
8,7,4
9,6,4


In [21]:
test_array = np.zeros((10, 10))

In [22]:
for ind in np.arange(len(test_points_df)):
    x = test_points_df.loc[ind, 'x']
    y = test_points_df.loc[ind, 'y']
    test_array[x, y] = test_array[x, y] + 1

In [23]:
test_array.T

array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 1., 2., 1., 1., 1., 2., 1., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [2., 2., 2., 1., 1., 1., 0., 0., 0., 0.]])

### part 2

In [24]:
test_points_df = pd.DataFrame(columns = ['x', 'y'], dtype=int)
for ind in np.arange(len(test_df)):
    points = find_line(test_df.loc[ind, :])
    test_points_df = test_points_df.append(points)
test_points_df = test_points_df.reset_index(drop=True)
test_points_df.head(10)

Unnamed: 0,x,y
0,0,9
1,1,9
2,2,9
3,3,9
4,4,9
5,5,9
6,8,0
7,7,1
8,6,2
9,5,3


In [25]:
test_array = np.zeros((10, 10))

In [26]:
for ind in np.arange(len(test_points_df)):
    x = test_points_df.loc[ind, 'x']
    y = test_points_df.loc[ind, 'y']
    test_array[x, y] = test_array[x, y] + 1

In [27]:
test_array.T

array([[1., 0., 1., 0., 0., 0., 0., 1., 1., 0.],
       [0., 1., 1., 1., 0., 0., 0., 2., 0., 0.],
       [0., 0., 2., 0., 1., 0., 1., 1., 1., 0.],
       [0., 0., 0., 1., 0., 2., 0., 2., 0., 0.],
       [0., 1., 1., 2., 3., 1., 3., 2., 1., 1.],
       [0., 0., 0., 1., 0., 2., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [2., 2., 2., 1., 1., 1., 0., 0., 0., 0.]])