# ***Advent of Code 2024 - Day 01***

## ***Day 1A***

### Step 1 - Input data

In [40]:
# Input the dataset from a text file
import pandas as pd
import numpy as np

df = pd.read_csv('input.txt', delimiter='\t', header=None)
df.head()

Unnamed: 0,0
0,76569 66648
1,38663 66530
2,60350 60777
3,35330 13469
4,88681 66648


***Observation***

Each row is list of string with 02 numbers, separated by 02 blank spaces. I'd need to come up with a function which:
- input: string
- split into 02 values, then each values are converted to integer
- return a list with 02 values

### Step 2 - Split data

In [41]:
# split and convert function
def split_convert(x):
    return [int(i) for i in x.split('  ')]

In [None]:
# Apply split_convert function on the df.
df[['col1', 'col2']] = df[0].apply(split_convert).tolist()
df.head()

Unnamed: 0,0,col1,col2
0,76569 66648,76569,66648
1,38663 66530,38663,66530
2,60350 60777,60350,60777
3,35330 13469,35330,13469
4,88681 66648,88681,66648


In [44]:
# Arrange new col1 & col2 in descending order
df['col1'] = df['col1'].sort_values().values
df['col2'] = df['col2'].sort_values().values
df.head()

Unnamed: 0,0,col1,col2
0,76569 66648,10053,10004
1,38663 66530,10073,10076
2,60350 60777,10269,10099
3,35330 13469,10341,10197
4,88681 66648,10463,10480


In [45]:
# New column with total distance with abs between col1 & col2
df['distance'] = abs(df['col1'] - df['col2'])
df.head()

Unnamed: 0,0,col1,col2,distance
0,76569 66648,10053,10004,49
1,38663 66530,10073,10076,3
2,60350 60777,10269,10099,170
3,35330 13469,10341,10197,144
4,88681 66648,10463,10480,17


In [None]:
#Total distancce
df['distance'].sum()

3246517

## ***Day 1B***

### Step 1 - Input data

In [47]:
df2 = pd.read_csv('input.txt', header=None, delimiter='\t')
df2.head()

Unnamed: 0,0
0,76569 66648
1,38663 66530
2,60350 60777
3,35330 13469
4,88681 66648


***Observation***:
> This time, you'll need to figure out exactly how often each number from the left list appears in the right list

This sounds very much like a map task (dict in Python). You can go brute force through each item in left hand side but it's super slow. 
A more efficient approach would be: 
- Create a counting dict from right hand list, 
- the use it to count how many times each item on left hand list appeared on right hand list

In [48]:
# Apply split_convert function on the df.
df2[['col1', 'col2']] = df2[0].apply(split_convert).tolist()
df2.head()

Unnamed: 0,0,col1,col2
0,76569 66648,76569,66648
1,38663 66530,38663,66530
2,60350 60777,60350,60777
3,35330 13469,35330,13469
4,88681 66648,88681,66648


In [51]:
unique_values_col1 = set(df2['col1'])
unique_values_col1

{24576,
 57349,
 49159,
 16391,
 73737,
 53260,
 75791,
 67600,
 94225,
 20498,
 43026,
 45075,
 94223,
 51222,
 98326,
 86041,
 12314,
 20507,
 32794,
 43037,
 75803,
 10269,
 61475,
 51238,
 86055,
 59431,
 55335,
 59434,
 61483,
 63535,
 41008,
 63537,
 59442,
 73779,
 57396,
 71736,
 86080,
 90177,
 75842,
 67653,
 28744,
 59467,
 41035,
 36941,
 88144,
 77904,
 28755,
 18516,
 63575,
 45144,
 67674,
 36955,
 22622,
 32863,
 61539,
 16483,
 10341,
 55399,
 34920,
 14441,
 53351,
 16487,
 24684,
 82029,
 47213,
 82030,
 65644,
 26741,
 45174,
 96383,
 27705,
 94339,
 39046,
 30857,
 94345,
 61579,
 57495,
 82072,
 37019,
 32926,
 55455,
 94368,
 34977,
 61602,
 45219,
 75941,
 45223,
 39081,
 43179,
 86197,
 12474,
 47296,
 67778,
 59588,
 12487,
 78026,
 75979,
 59594,
 84175,
 37077,
 24790,
 49368,
 16602,
 78044,
 92382,
 10463,
 32994,
 90339,
 10468,
 76009,
 57577,
 30957,
 14573,
 59629,
 26868,
 33020,
 14589,
 16638,
 26879,
 84224,
 71937,
 84226,
 47358,
 43262,
 78085,


In [53]:
count_col2 = {}
for item in df2['col2']:
    count_col2[item] = count_col2.get(item, 0) + 1
count_col2

{66648: 16,
 66530: 12,
 60777: 1,
 13469: 11,
 83262: 1,
 40350: 1,
 61801: 1,
 80612: 1,
 47207: 1,
 73737: 3,
 35371: 1,
 20896: 17,
 77147: 11,
 32690: 1,
 45373: 20,
 96652: 13,
 53803: 1,
 62925: 20,
 64433: 19,
 96929: 1,
 87748: 17,
 75803: 17,
 82976: 1,
 50513: 1,
 25876: 1,
 27282: 18,
 52482: 1,
 98326: 2,
 60692: 1,
 78044: 12,
 28829: 1,
 24022: 1,
 76616: 1,
 78368: 1,
 59182: 1,
 10920: 1,
 94982: 8,
 70248: 9,
 67267: 1,
 40092: 20,
 26048: 1,
 97947: 15,
 31426: 15,
 75016: 1,
 72913: 11,
 74888: 10,
 26337: 1,
 21184: 16,
 65978: 1,
 62436: 1,
 89076: 1,
 27190: 1,
 84666: 1,
 60277: 1,
 65172: 18,
 50401: 1,
 13015: 1,
 21521: 1,
 32013: 1,
 67456: 1,
 60117: 1,
 77235: 18,
 55336: 1,
 59405: 1,
 76671: 1,
 78380: 1,
 64287: 1,
 61870: 10,
 19133: 7,
 79653: 1,
 54621: 1,
 40810: 1,
 41746: 1,
 21471: 1,
 21359: 1,
 18921: 1,
 80604: 1,
 48458: 1,
 67895: 19,
 32324: 14,
 27128: 1,
 89974: 1,
 30915: 1,
 74017: 1,
 85558: 1,
 68395: 1,
 67206: 1,
 96114: 1,
 66153: 

In [None]:
similar_count_of_col1 = {item: count_col2.get(item, 0) for item in unique_values_col1}
filtered_similar_count_of_col1 = {k: v for k, v in similar_count_of_col1.items() if v>0}

{73737: 3,
 98326: 2,
 75803: 17,
 26741: 2,
 34977: 9,
 78044: 12,
 92382: 6,
 67895: 19,
 67896: 8,
 45373: 20,
 96652: 13,
 96669: 5,
 20896: 17,
 61870: 10,
 70248: 9,
 27282: 18,
 84666: 1,
 19133: 7,
 21184: 16,
 31426: 15,
 94982: 8,
 64433: 19,
 78775: 3,
 35769: 4,
 78795: 10,
 66530: 12,
 66648: 16,
 21593: 8,
 74888: 10,
 40092: 20,
 13469: 11,
 33966: 13,
 72913: 11,
 29942: 1,
 77147: 11,
 77235: 18,
 62925: 20,
 87515: 1,
 77312: 13,
 32324: 14,
 65172: 18,
 97947: 15,
 87748: 17,
 20178: 2,
 92102: 9}

In [61]:
total = sum(key * value for key, value in filtered_similar_count_of_col1.items())
total

29379307