In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load the dataset
df = pd.read_csv('../datasets/portableWaterQualityLogsTrain.csv')

# Assuming the columns in the CSV are named 'Water Volume', 'Flow Rate', and 'Status'
data = {
    'pH': df['pH'].tolist(),
    'Conductivity': df['Conductivity'].tolist(),
    'Chlorine': df['Chlorine'].tolist(),
    'Status': df['Status'].tolist()
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Feature and label separation
X = df[['pH', 'Conductivity', 'Chlorine']]
y = df['Status']

# Check the number of unique classes
num_classes = y.nunique()

if len(X) < num_classes:
    raise ValueError(f"Not enough samples to create a test set with each class present. Total samples: {len(X)}, Classes: {num_classes}")

# Adjusted test size
test_size = 0.25  # 25% test size

# Stratified train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)

# Train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, zero_division=0))

# Function to classify new input data based on 'Water Volume' and 'Flow Rate'
def check_value(pH, conductivity, chlorine):
    # Create a DataFrame with the input values and proper column names
    input_df = pd.DataFrame([[pH, conductivity, chlorine]], columns=['pH', 'Conductivity', 'Chlorine'])
    prediction = model.predict(input_df)
    return prediction[0]

# Load test data
tdf = pd.read_csv('../datasets/portableWaterQualityLogsTest.csv')

# Testing new inputs from 'logsTest.csv'
new_data = tdf[['pH', 'Conductivity', 'Chlorine']].values.tolist()
for value in new_data:
    result = check_value(value[0], value[1], value[2])  # Provide the correct number of arguments
    print(f'Input: pH = {value[0]}, Conductivity = {value[1]}, Chlorine = {value[2]}, Classification: {result}')

import joblib
joblib.dump(model, 'potable_water_quality.pkl')

              precision    recall  f1-score   support

   ANOMALOUS       1.00      0.75      0.86         8
      NORMAL       0.98      1.00      0.99       117

    accuracy                           0.98       125
   macro avg       0.99      0.88      0.92       125
weighted avg       0.98      0.98      0.98       125

Input: pH = 7.5, Conductivity = 200.0, Chlorine = 1.0, Classification: NORMAL
Input: pH = 7.51, Conductivity = 201.99, Chlorine = 1.01, Classification: NORMAL
Input: pH = 7.52, Conductivity = 243.31, Chlorine = 1.01, Classification: ANOMALOUS
Input: pH = 7.53, Conductivity = 205.65, Chlorine = 1.02, Classification: NORMAL
Input: pH = 7.54, Conductivity = 207.17, Chlorine = 1.03, Classification: NORMAL
Input: pH = 7.55, Conductivity = 208.41, Chlorine = 1.03, Classification: NORMAL
Input: pH = 7.56, Conductivity = 209.32, Chlorine = 1.04, Classification: NORMAL
Input: pH = 7.56, Conductivity = 209.85, Chlorine = 1.04, Classification: NORMAL
Input: pH = 7.57, Conduct

Input: pH = 7.59, Conductivity = 192.01, Chlorine = 1.05, Classification: NORMAL
Input: pH = 7.58, Conductivity = 190.97, Chlorine = 1.04, Classification: NORMAL
Input: pH = 7.58, Conductivity = 190.3, Chlorine = 1.04, Classification: NORMAL
Input: pH = 7.57, Conductivity = 190.01, Chlorine = 1.03, Classification: NORMAL
Input: pH = 7.57, Conductivity = 190.12, Chlorine = 1.32, Classification: NORMAL
Input: pH = 7.56, Conductivity = 190.62, Chlorine = 1.02, Classification: NORMAL
Input: pH = 7.55, Conductivity = 191.5, Chlorine = 1.01, Classification: NORMAL
Input: pH = 7.54, Conductivity = 192.72, Chlorine = 1.01, Classification: NORMAL
Input: pH = 7.53, Conductivity = 194.22, Chlorine = 1.0, Classification: NORMAL
Input: pH = 7.52, Conductivity = 195.96, Chlorine = 0.99, Classification: NORMAL
Input: pH = 7.51, Conductivity = 197.86, Chlorine = 0.99, Classification: NORMAL
Input: pH = 7.5, Conductivity = 199.84, Chlorine = 0.61, Classification: ANOMALOUS
Input: pH = 7.49, Conductivit

Input: pH = 7.41, Conductivity = 208.87, Chlorine = 0.96, Classification: NORMAL
Input: pH = 7.41, Conductivity = 207.77, Chlorine = 0.95, Classification: NORMAL
Input: pH = 7.41, Conductivity = 206.37, Chlorine = 0.95, Classification: NORMAL
Input: pH = 7.4, Conductivity = 204.71, Chlorine = 0.95, Classification: NORMAL
Input: pH = 7.4, Conductivity = 202.86, Chlorine = 0.95, Classification: NORMAL
Input: pH = 7.4, Conductivity = 200.9, Chlorine = 0.95, Classification: NORMAL
Input: pH = 7.4, Conductivity = 198.9, Chlorine = 1.35, Classification: NORMAL
Input: pH = 7.4, Conductivity = 196.95, Chlorine = 0.95, Classification: NORMAL
Input: pH = 7.4, Conductivity = 195.12, Chlorine = 0.96, Classification: NORMAL
Input: pH = 7.41, Conductivity = 193.48, Chlorine = 0.96, Classification: NORMAL
Input: pH = 7.41, Conductivity = 178.89, Chlorine = 0.97, Classification: ANOMALOUS
Input: pH = 7.42, Conductivity = 191.04, Chlorine = 0.97, Classification: NORMAL
Input: pH = 7.42, Conductivity = 

Input: pH = 7.49, Conductivity = 198.36, Chlorine = 1.29, Classification: NORMAL
Input: pH = 7.5, Conductivity = 200.35, Chlorine = 1.0, Classification: NORMAL
Input: pH = 7.51, Conductivity = 202.33, Chlorine = 1.01, Classification: NORMAL
Input: pH = 7.52, Conductivity = 204.22, Chlorine = 1.02, Classification: NORMAL
Input: pH = 7.53, Conductivity = 205.94, Chlorine = 1.02, Classification: NORMAL
Input: pH = 7.54, Conductivity = 207.42, Chlorine = 1.03, Classification: NORMAL
Input: pH = 7.55, Conductivity = 208.6, Chlorine = 1.03, Classification: NORMAL
Input: pH = 7.56, Conductivity = 209.44, Chlorine = 1.04, Classification: NORMAL
Input: pH = 7.57, Conductivity = 209.91, Chlorine = 1.04, Classification: NORMAL
Input: pH = 7.57, Conductivity = 209.98, Chlorine = 1.05, Classification: NORMAL
Input: pH = 7.58, Conductivity = 209.65, Chlorine = 1.05, Classification: NORMAL
Input: pH = 7.59, Conductivity = 208.94, Chlorine = 1.05, Classification: NORMAL
Input: pH = 7.59, Conductivity 

['potable_water_quality.pkl']

In [2]:
model2 = joblib.load('potable_water_quality.pkl')

In [3]:
def ck_value(pH, conductivity, chlorine):
    # Create a DataFrame with the input values and proper column names
    input_df = pd.DataFrame([[pH, conductivity, chlorine]], columns=['pH', 'Conductivity', 'Chlorine'])
    prediction = model2.predict(input_df)
    return prediction[0]

# Load test data
tdf = pd.read_csv('../datasets/portableWaterQualityLogsTest.csv')

# Testing new inputs from 'logsTest.csv'
new_data = tdf[['pH', 'Conductivity', 'Chlorine']].values.tolist()
for value in new_data:
    result = ck_value(value[0], value[1], value[2])  # Provide the correct number of arguments
    print(f'Input: pH = {value[0]}, Conductivity = {value[1]}, Chlorine = {value[2]}, Classification: {result}')

Input: pH = 7.5, Conductivity = 200.0, Chlorine = 1.0, Classification: NORMAL
Input: pH = 7.51, Conductivity = 201.99, Chlorine = 1.01, Classification: NORMAL
Input: pH = 7.52, Conductivity = 243.31, Chlorine = 1.01, Classification: ANOMALOUS
Input: pH = 7.53, Conductivity = 205.65, Chlorine = 1.02, Classification: NORMAL
Input: pH = 7.54, Conductivity = 207.17, Chlorine = 1.03, Classification: NORMAL
Input: pH = 7.55, Conductivity = 208.41, Chlorine = 1.03, Classification: NORMAL
Input: pH = 7.56, Conductivity = 209.32, Chlorine = 1.04, Classification: NORMAL
Input: pH = 7.56, Conductivity = 209.85, Chlorine = 1.04, Classification: NORMAL
Input: pH = 7.57, Conductivity = 210.0, Chlorine = 1.05, Classification: NORMAL
Input: pH = 7.58, Conductivity = 209.74, Chlorine = 1.05, Classification: NORMAL
Input: pH = 7.58, Conductivity = 209.09, Chlorine = 1.05, Classification: NORMAL
Input: pH = 7.59, Conductivity = 208.08, Chlorine = 1.05, Classification: NORMAL
Input: pH = 7.59, Conductivit

Input: pH = 7.42, Conductivity = 209.16, Chlorine = 0.95, Classification: NORMAL
Input: pH = 7.41, Conductivity = 208.18, Chlorine = 0.95, Classification: NORMAL
Input: pH = 7.41, Conductivity = 206.87, Chlorine = 0.96, Classification: NORMAL
Input: pH = 7.4, Conductivity = 205.29, Chlorine = 0.96, Classification: NORMAL
Input: pH = 7.4, Conductivity = 203.5, Chlorine = 0.97, Classification: NORMAL
Input: pH = 7.4, Conductivity = 201.57, Chlorine = 0.97, Classification: NORMAL
Input: pH = 7.4, Conductivity = 199.58, Chlorine = 0.98, Classification: NORMAL
Input: pH = 7.4, Conductivity = 197.6, Chlorine = 0.99, Classification: NORMAL
Input: pH = 7.4, Conductivity = 195.72, Chlorine = 0.99, Classification: NORMAL
Input: pH = 7.41, Conductivity = 164.96, Chlorine = 1.0, Classification: ANOMALOUS
Input: pH = 7.41, Conductivity = 192.54, Chlorine = 1.01, Classification: NORMAL
Input: pH = 7.41, Conductivity = 191.36, Chlorine = 1.01, Classification: NORMAL
Input: pH = 7.42, Conductivity = 2

Input: pH = 7.59, Conductivity = 206.99, Chlorine = 1.03, Classification: NORMAL
Input: pH = 7.6, Conductivity = 205.43, Chlorine = 1.02, Classification: NORMAL
Input: pH = 7.6, Conductivity = 203.65, Chlorine = 0.84, Classification: NORMAL
Input: pH = 7.6, Conductivity = 201.73, Chlorine = 1.01, Classification: NORMAL
Input: pH = 7.6, Conductivity = 199.73, Chlorine = 1.0, Classification: NORMAL
Input: pH = 7.6, Conductivity = 197.75, Chlorine = 0.99, Classification: NORMAL
Input: pH = 7.6, Conductivity = 195.86, Chlorine = 0.99, Classification: NORMAL
Input: pH = 7.6, Conductivity = 194.14, Chlorine = 0.98, Classification: NORMAL
Input: pH = 7.59, Conductivity = 192.64, Chlorine = 0.97, Classification: NORMAL
Input: pH = 7.59, Conductivity = 191.44, Chlorine = 0.97, Classification: NORMAL
Input: pH = 7.58, Conductivity = 190.59, Chlorine = 0.96, Classification: NORMAL
Input: pH = 7.58, Conductivity = 222.1, Chlorine = 0.96, Classification: NORMAL
Input: pH = 7.57, Conductivity = 190.

Input: pH = 7.6, Conductivity = 195.26, Chlorine = 1.03, Classification: NORMAL
Input: pH = 7.59, Conductivity = 193.61, Chlorine = 1.02, Classification: NORMAL
Input: pH = 7.59, Conductivity = 192.21, Chlorine = 1.01, Classification: NORMAL
Input: pH = 7.59, Conductivity = 191.12, Chlorine = 1.01, Classification: NORMAL
Input: pH = 7.58, Conductivity = 190.38, Chlorine = 1.0, Classification: NORMAL
Input: pH = 7.57, Conductivity = 190.03, Chlorine = 0.99, Classification: NORMAL
Input: pH = 7.57, Conductivity = 190.08, Chlorine = 0.98, Classification: NORMAL
Input: pH = 7.56, Conductivity = 190.52, Chlorine = 0.98, Classification: NORMAL
Input: pH = 7.55, Conductivity = 191.34, Chlorine = 0.97, Classification: NORMAL
Input: pH = 7.54, Conductivity = 192.5, Chlorine = 0.97, Classification: NORMAL
Input: pH = 7.53, Conductivity = 193.97, Chlorine = 0.96, Classification: NORMAL
Input: pH = 7.52, Conductivity = 195.67, Chlorine = 0.96, Classification: NORMAL
Input: pH = 7.51, Conductivity 