# Advent of Code Day 2

## Part 1

In [169]:
import pandas as pd
import numpy as np

Paste input data into csv file and read into Pandas dataframe

In [None]:
df = pd.read_csv('input-day2.csv', sep=' ', header=None, engine='python',)

Unnamed: 0,0,1,2,3,4,5,6,7
0,22,25,27,28,30,31.0,32.0,29.0
1,72,74,75,77,80,81.0,81.0,
2,52,53,55,58,59,63.0,,
3,14,17,19,22,27,,,
4,65,68,67,68,71,73.0,76.0,77.0
...,...,...,...,...,...,...,...,...
995,43,46,49,52,55,56.0,57.0,58.0
996,41,44,47,48,50,53.0,,
997,33,31,28,27,24,22.0,19.0,
998,36,35,32,31,28,,,


Iterate through rows and drop NaN values then use .is_monotonic_increasing and .is_monotonic_decreasing with .is_unique to check for strict increasing/decreasing to check the first rule.
If the series passes the first test, check if all absolute values in the series are less than 4. (The strict monotonic check already filtered out any values with a difference of less than 1.)

*I feel like there may be a way to do this in a vectorized way, maybe with apply, but I wasn't sure how to do the extra data cleaning for the processing.*

In [210]:
clean_list = []
for idx, row in df.iterrows():
    p1 = row.dropna()
    if (p1.is_monotonic_increasing and p1.is_unique) or (p1.is_monotonic_decreasing and p1.is_unique):
        clean = p1.diff().dropna().astype('int').abs()
        if clean.lt(4).all():
            clean_list.append(clean)

solution_p1 = len(clean_list)
solution_p1

639

## Part 2

The Problem Dampener is a reactor-mounted module that lets the reactor safety systems tolerate a single bad level in what would otherwise be a safe report. It's like the bad level never happened! Now, the same rules apply as before, except if removing a single level from an unsafe report would make it safe, the report instead counts as safe. Update your analysis by handling situations where the Problem Dampener can remove a single level from unsafe reports. How many reports are now safe?

Run the same steps as above, but append all the unsafe reports to a list for this part. 

Try:
- [X] Groupby then counter to count how many groups are >= 2 counts, if counter is more than 1, it fails
- [ ] Create df of series, column of diffs, and if diff is +/-/0 
- [ ] monotonic without unique

In [228]:
# Reconfigure Part 1 solution
def check_strict_safe(input_df):
    ''' Strict check on safe reports ignoring the Problem Dampener.
        Input is Pandas df made from csv.
        Checks for strict increasing or decreasing numbers and the difference between all values is less than 4.
        Returns 2 lists of Pandas series, Strictly Safe and Possibly Unsafe.
    '''
    strict_safe_list = []
    unsafe_list = []
    for idx, row in input_df.iterrows():
        p1 = row.dropna()
        # Checks for strict increasing or decreasing values in series and writes to associated list
        if (p1.is_monotonic_increasing and p1.is_unique) or (p1.is_monotonic_decreasing and p1.is_unique):
            clean = p1.diff().dropna().astype('int').abs()
            # Only add series where all diff values are under 4 to safe list
            if clean.lt(4).all():
                strict_safe_list.append(clean)
            else:
                unsafe_list.append(p1)
        else:
            unsafe_list.append(p1)
    return strict_safe_list , unsafe_list

strict_safe_list, unsafe_list = check_strict_safe(df)
print(len(unsafe_list)) # Checking I captured all the unsafe reports (Should be 361 (1000 - 639)

361


### Checks Toolbox

In [None]:
# Checks Strict safety - Run after any cleaning operation, and if False, add it to the rejected list
def is_strict_safe(input_series):
    clean = input_series.dropna()
    if ((clean.is_monotonic_increasing and clean.is_unique) or (clean.is_monotonic_decreasing and clean.is_unique)):
        clean2 = clean.diff().dropna().astype('int').abs()
        if clean2.lt(4).all():
            return True
        else:
            return False
    return False

# Checks for Duplicate Values greater or equal to 3
def num_dup_values_check(input_series):
    grouped = input_series.groupby(input_series).count()
    if grouped.ge(3).any():
        return False
    return True

# Checks for multiple sets of Duplicate Values
def mult_dup_values_check(input_series):
    grouped = input_series.groupby(input_series).count()
    counter = 0
    for i in grouped:
        if i == 2:
            counter += 1
    if counter > 1:
        return False
    else:
        return True
    
# Check for more than a single abs(diff) value over 3
def num_diff_values(input_series):
    diffed = input_series.diff().dropna().astype('int').abs()
    counter = 0
    for i in diffed:
        if i > 3:
            counter += 1
    if counter > 1:
        return False
    else:
        return True

# Check for multiple changes in trend slope direction
def slope_direction_changes(input_series):
    df = pd.DataFrame({'values' : input_series, 'diffs' : input_series.diff(), 'signs' : np.sign(input_series.diff())})
    signs = df['signs'].groupby(df['signs']).count()
    length = len(signs)
    if length > 2:
        return False
    else:
        return True

In [None]:
check_1 = []
rejected = []
for record in unsafe_list:
    if num_dup_values_check(record):
        check_1.append(record)
    else:
        rejected.append(record)

print(len(check_1))

332


In [None]:
check_2 = []
for record in check_1:
    if mult_dup_values_check(record):
        check_2.append(record)
    else:
        rejected.append(record)

print(len(check_2))

282


In [None]:
check_3 = []
for record in check_2:
    if num_diff_values(record):
        check_3.append(record)
    else:
        rejected.append(record)

print(len(check_3))

154


In [None]:
check_4 = []
for record in check_3:
    if slope_direction_changes(record):
        check_4.append(record)
    else:
        rejected.append(record)
print(len(check_4))

127


Situations to check

Immediate disqualification
- Duplicate value groups > 2
- Duplicate values over 2. (Groupby('value').any() count is >= 3)
- More than 1 diff value abs(>= 4)
- 3 groups after groupby('signs')

Requires additional checks
- Single set of duplicated values - Increase error count for df
- Single diff value >= 4 - Increase error count (Will need to remove value and rerun diff)
