# Advent of Code Day 2

## Part 1

In [269]:
import pandas as pd

# Set display options to prevent wrapping
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.expand_frame_repr', False)

Paste input data into csv file and read into Pandas dataframe

In [270]:
df = pd.read_csv('input-day2.csv', sep=' ', header=None, engine='python',)
df

Unnamed: 0,0,1,2,3,4,5,6,7
0,22,25,27,28,30,31.0,32.0,29.0
1,72,74,75,77,80,81.0,81.0,
2,52,53,55,58,59,63.0,,
3,14,17,19,22,27,,,
4,65,68,67,68,71,73.0,76.0,77.0
...,...,...,...,...,...,...,...,...
995,43,46,49,52,55,56.0,57.0,58.0
996,41,44,47,48,50,53.0,,
997,33,31,28,27,24,22.0,19.0,
998,36,35,32,31,28,,,


Iterate through rows and drop NaN values then use .is_monotonic_increasing and .is_monotonic_decreasing with .is_unique to check for strict increasing/decreasing to check the first rule.
If the series passes the first test, check if all absolute values in the series are less than 4. (The strict monotonic check already filtered out any values with a difference of less than 1.)

*I feel like there may be a way to do this in a vectorized way, maybe with apply, but I wasn't sure how to do the extra data cleaning for the processing.*

In [271]:
clean_list = []
for idx, row in df.iterrows():
    p1 = row.dropna()
    if (p1.is_monotonic_increasing and p1.is_unique) or (p1.is_monotonic_decreasing and p1.is_unique):
        clean = p1.diff().dropna().astype('int').abs()
        if clean.lt(4).all():
            clean_list.append(clean)

solution_p1 = len(clean_list)
solution_p1

639

## Part 2

Run the same steps as above, but append all the unsafe reports to a list for this part. 

In [272]:
import numpy as np

In [273]:
# Reconfigure Part 1 solution
def check_strict_safe(input_df):
    ''' Strict check on safe reports ignoring the Problem Dampener.
        Input is Pandas df made from csv.
        Checks for strict increasing or decreasing numbers and the difference between all values is less than 4.
        Returns 2 lists of Pandas series, Strictly Safe and Possibly Unsafe.
    '''
    strict_safe_list = []
    unsafe_list = []
    for idx, row in input_df.iterrows():
        p1 = row.dropna()
        # Checks for strict increasing or decreasing values in series and writes to associated list
        if (p1.is_monotonic_increasing and p1.is_unique) or (p1.is_monotonic_decreasing and p1.is_unique):
            clean = p1.diff().dropna().astype('int').abs()
            # Only add series where all diff values are under 4 to safe list
            if clean.lt(4).all():
                strict_safe_list.append(p1)
            else:
                unsafe_list.append(p1)
        else:
            unsafe_list.append(p1)
    return strict_safe_list , unsafe_list

strict_safe_list, unsafe_list = check_strict_safe(df)
print(len(unsafe_list)) # Checking I captured all the unsafe reports (Should be 361 (1000 - 639)

361


### Checks Toolbox

In [274]:
def make_dfs(input_list):
    '''Creates list of dataframes from original input dataframe with calculated columns for analysis'''
    df_list =[]
    for record in input_list:
        df = pd.DataFrame({'values' : record, 
                           'diffs' : record.diff(), 
                           'signs' : np.sign(record.diff()),
                           'direction' : None,
                           'dir_error' : None,
                           'duplicates' : record.duplicated(keep=False), 
                           'diffs_gt_3' : record.diff().abs().gt(3),
                           'error_types': None,
                           'errors' : 0,
                           })
        # Set direction
        df['direction'] = df.apply(lambda row: 1 if df['diffs'].sum() > 0 else -1 if df['diffs'].sum() < 0 else 0, axis=1)
        df['dir_error'] = df.apply(lambda row: True if row['signs'] != row['direction'] and pd.notna(row['signs']) else False, axis=1)
        # Add 1 to errors column value if error column == True
        # df['errors'] = df.apply(lambda row: row['errors'] + 1 if row['duplicates'] or row['diffs_gt_3'] or row['dir_error'] else row['errors'], axis=1)
        df_list.append(df)
    return df_list

def set_errors(df):
    df['error_types'] = [[] for _ in range(len(df))]
    df['error_types'] = np.where(df['dir_error'], df['error_types'].apply(lambda x: x + ['dir']), df['error_types'])
    df['error_types'] = np.where(df['duplicates'], df['error_types'].apply(lambda x: x + ['dup']), df['error_types'])
    df['error_types'] = np.where(df['diffs_gt_3'], df['error_types'].apply(lambda x: x + ['diff']), df['error_types'])
    df['errors'] = df['dir_error'].astype('int') + df['duplicates'].astype('int') + df['diffs_gt_3'].astype('int')

In [275]:
########################################################## 

def is_strict_safe(input_df):
    '''Checks Strict safety. True is safe, False is unsafe'''
    clean = input_df['values'].dropna()
    if ((clean.is_monotonic_increasing and clean.is_unique) or (clean.is_monotonic_decreasing and clean.is_unique)):
        clean2 = clean.diff().dropna().astype('int').abs()
        if clean2.lt(4).all():
            return True
        else:
            return False
    return False

def slope_change_gt_threshold(input_df, threshold):
    '''Check slope direction changes with a variable threshold (True indicates value > threshold)'''
    length = len(input_df['signs'].groupby(input_df['signs']).count())
    
    if length > threshold:
        return True
    else:
        return False

def duplicate_values_ge_threshold(input_df, threshold):
    '''Checks in grouped values column for any values greater or equal to threshold (True indicates any value >= threshold)'''
    grouped = input_df['values'].groupby(input_df['values']).count()
    if grouped.ge(threshold).any():
        return True
    else:
        return False
    
def sum_errors_gt_threshold(input_df, threshold):
    '''Check sum of the errors column and applies threshold (True indicates value > threshold) '''
    summed = input_df['errors'].sum()
    if summed > threshold:
        return True
    else:
        return False  


Situations to check

Immediate disqualification
- Duplicate value groups > 2
- Duplicate values over 2. (Groupby('value').any() count is >= 3)
- More than 1 diff value abs(>= 4)
- 3 groups after groupby('signs') - slope_change_gt_threshold()

Requires additional checks
- Single set of duplicated values - Increase error count for df
- Single diff value >= 4 - Increase error count (Will need to remove value and rerun diff)

if 1 error, remove row and run strict test
if 2 error and both duplicate, drop first dupe then run strict, if fails, drop 2nd dupe then test with strict. If both fail, fails

In [306]:
unsafe_df_list = make_dfs(unsafe_list)

for df in unsafe_df_list:
    set_errors(df)

In [307]:
for df in unsafe_df_list:
    print(df)

   values  diffs  signs  direction  dir_error  duplicates  diffs_gt_3 error_types  errors
0    22.0    NaN    NaN          1      False       False       False          []       0
1    25.0    3.0    1.0          1      False       False       False          []       0
2    27.0    2.0    1.0          1      False       False       False          []       0
3    28.0    1.0    1.0          1      False       False       False          []       0
4    30.0    2.0    1.0          1      False       False       False          []       0
5    31.0    1.0    1.0          1      False       False       False          []       0
6    32.0    1.0    1.0          1      False       False       False          []       0
7    29.0   -3.0   -1.0          1       True       False       False       [dir]       1
   values  diffs  signs  direction  dir_error  duplicates  diffs_gt_3 error_types  errors
0    72.0    NaN    NaN          1      False       False       False          []       0
1    74.0 

In [308]:
def multi_error_check(input_df):
    num_dir = input_df['dir_error'].sum()
    num_diff = input_df['diffs_gt_3'].sum()

    if num_dir > 1 or num_diff > 1 or num_diff + num_dir > 1:
        return True
    else:
        return False

# for df in unsafe_df_list:
#     multi_error_check(df)
    


In [None]:

rejected_df_list = []
clean_df_list = []
for df in unsafe_df_list:
    conditions = [slope_change_gt_threshold(df, 2),
              duplicate_values_ge_threshold(df, 3),
              sum_errors_gt_threshold(df, 2),
              multi_error_check(df), # FIX THIS, TOO LOW after Answer is somewhere between 80 and 25
]

    if any(conditions):
        # print(True)
        rejected_df_list.append(df)
    else:
        # print(False)
        clean_df_list.append(df)

# for df in clean_df_list:
#     print(df)
print(len(clean_df_list))

25


In [310]:
for df in clean_df_list:
    print(df)

   values  diffs  signs  direction  dir_error  duplicates  diffs_gt_3 error_types  errors
0    22.0    NaN    NaN          1      False       False       False          []       0
1    25.0    3.0    1.0          1      False       False       False          []       0
2    27.0    2.0    1.0          1      False       False       False          []       0
3    28.0    1.0    1.0          1      False       False       False          []       0
4    30.0    2.0    1.0          1      False       False       False          []       0
5    31.0    1.0    1.0          1      False       False       False          []       0
6    32.0    1.0    1.0          1      False       False       False          []       0
7    29.0   -3.0   -1.0          1       True       False       False       [dir]       1
   values  diffs  signs  direction  dir_error  duplicates  diffs_gt_3 error_types  errors
0    52.0    NaN    NaN          1      False       False       False          []       0
1    53.0 