# IMPORTING DATA

In [9]:
# Import reader module from csv Library
from csv import reader

# read the CSV file
def load_csv(filename):
    # Open file in read mode
    file = open(filename,"r")
    # Reading file 
    lines = reader(file)
    
    # Converting into a list 
    data = list(lines)
    return data

if __name__ == "__main__":
    # Path of the dataset
    file_path = "Pool_Chem_Analysis.csv"
    data = load_csv(file_path)
    
print(data)   

[['\ufeffDay', '9AM', '1PM'], ['1', '2.8', '1.6'], ['2', '3.1', '2.1'], ['3', '3.0', '0.4'], ['4', '1.8', '0.9'], ['5', '1.2', '0.1'], ['6', '2.0', '0.7'], ['7', '1.8', '0.2'], ['8', '1.9', '1.1'], ['9', '3.0', '0.6'], ['10', '1.5', '1.8'], ['11', '1.5', '1.0'], ['12', '2.1', '2.5'], ['13', '1.6', '0.3'], ['14', '1.7', '0.7'], ['15', '2.6', '1.8'], ['16', '2.9', '1.6'], ['17', '2.9', '0.7'], ['18', '2.4', '0.7'], ['19', '1.4', '1.4'], ['20', '1.3', '1.5'], ['21', '2.9', '1.8'], ['22', '2.8', '2.0'], ['23', '1.4', '3.0'], ['24', '1.0', '0.0'], ['25', '2.6', '0.2'], ['26', '2.8', '1.4'], ['27', '1.7', '2.1'], ['28', '1.1', '1.3'], ['29', '3.0', '0.7'], ['30', '1.1', '0.7'], ['31', '1.5', '0.1'], ['32', '1.7', '0.7'], ['33', '1.7', '0.2'], ['34', '2.0', '1.4'], ['35', '1.8', '1.0'], ['36', '2.3', '0.5'], ['37', '1.7', '0.7'], ['38', '2.9', '0.4'], ['39', '0.1', '0.9'], ['40', '2.2', '0.8'], ['41', '2.0', '1.2'], ['42', '1.2', '0.5'], ['43', '1.0', '0.9'], ['44', '1.5', '0.3'], ['45', '2.9

# DATA WRANGLING

In [10]:
import pandas as pd
df = pd.DataFrame(data)

# Remove the first & second row
df = df.drop(index=0)
df = df.drop(index=1)

# Remove the first column
df = df.drop(df.columns[0], axis=1)

# Reset index
df = df.reset_index(drop=True)

# Rename the columns
df = df.rename(columns={1: '9AM_Cl2', 2: '1PM_Cl2'})
print(df)

# Change the data type of the 'Age' column to float
df['9AM_Cl2'] = df['9AM_Cl2'].astype(float)
df['1PM_Cl2'] = df['1PM_Cl2'].astype(float)


# Inspect data types
data_types = df.dtypes
print(data_types)


   9AM_Cl2 1PM_Cl2
0      3.1     2.1
1      3.0     0.4
2      1.8     0.9
3      1.2     0.1
4      2.0     0.7
..     ...     ...
63     1.9     1.6
64     3.0     1.5
65     1.4     0.1
66     2.8     2.0
67     1.1     0.5

[68 rows x 2 columns]
9AM_Cl2    float64
1PM_Cl2    float64
dtype: object


# CREATING FUNCTION TO CHECK RANGES

In [11]:
def check_range(data_frame, column, min_val, max_val):
    """
    Checks if each entry in a specified column of a data frame is within a specified numerical range.

    Parameters:
        data_frame (pd.DataFrame): The input data frame.
        column (str): The name of the column to check.
        min_val (float): The minimum value of the range.
        max_val (float): The maximum value of the range.

    Returns:
        pd.DataFrame: A new data frame with the original column and an additional column 'within_range'
                      indicating if each entry in the specified column is within the specified range.
    """
    within_range = (data_frame[column] >= min_val) & (data_frame[column] <= max_val)
    data_frame['within_range'] = within_range
    return data_frame


# Example usage:
# Specify the numerical range
min_val = 0.5
max_val = 4.0

# NOT WITHIN RANGE- 9AM Cl2 LEVELS

In [12]:
# Show the original column values next to True/False values
df_result_9AM = check_range(df, '9AM_Cl2', min_val, max_val)
print(df_result_9AM[['9AM_Cl2', 'within_range']])

# Filter rows where within_range is False, for Chlorine levels at 9am
filtered_df_9AM = df[~df_result_9AM['within_range']]
print("Filtered DataFrame for within_range=False:")
print(filtered_df_9AM[['9AM_Cl2', 'within_range']])

    9AM_Cl2  within_range
0       3.1          True
1       3.0          True
2       1.8          True
3       1.2          True
4       2.0          True
..      ...           ...
63      1.9          True
64      3.0          True
65      1.4          True
66      2.8          True
67      1.1          True

[68 rows x 2 columns]
Filtered DataFrame for within_range=False:
    9AM_Cl2  within_range
37      0.1         False


# NOT WITHIN RANGE- 1PM Cl2 LEVELS

In [13]:
# Show the original column values next to True/False values
df_result_1PM = check_range(df, '1PM_Cl2', min_val, max_val)
print(df_result_1PM[['1PM_Cl2', 'within_range']])

# Filter rows where within_range is False, for Chlorine levels at 1pm
filtered_df_1PM = df[~df_result_1PM['within_range']]
print("Filtered DataFrame for within_range=False:")
print(filtered_df_1PM[['1PM_Cl2', 'within_range']])
    

    1PM_Cl2  within_range
0       2.1          True
1       0.4         False
2       0.9          True
3       0.1         False
4       0.7          True
..      ...           ...
63      1.6          True
64      1.5          True
65      0.1         False
66      2.0          True
67      0.5          True

[68 rows x 2 columns]
Filtered DataFrame for within_range=False:
    1PM_Cl2  within_range
1       0.4         False
3       0.1         False
5       0.2         False
11      0.3         False
22      0.0         False
23      0.2         False
29      0.1         False
31      0.2         False
36      0.4         False
42      0.3         False
51      0.2         False
54      0.4         False
55      0.1         False
65      0.1         False


next:
- figure out differences from ideal value
- make formula to tell u how much Cl2 to add depending on the difference... 
-- diff = 0: none diff >0 : add xxx thiosulfate diff <0 : add xxx Cl2