In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load the dataset
df = pd.read_csv('logsTrain.csv')

# Assuming the columns in the CSV are named 'Water Volume', 'Flow Rate', and 'Status'
data = {
    'Contamination': df['Contamination'].tolist(),
    'Filter Efficiency': df['Filter Efficiency'].tolist(),
    'Status': df['Status'].tolist()
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Feature and label separation
X = df[['Contamination', 'Filter Efficiency']]
y = df['Status']

# Check the number of unique classes
num_classes = y.nunique()

if len(X) < num_classes:
    raise ValueError(f"Not enough samples to create a test set with each class present. Total samples: {len(X)}, Classes: {num_classes}")

# Adjusted test size
test_size = 0.25  # 25% test size

# Stratified train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)

# Train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, zero_division=0))

# Function to classify new input data based on 'Water Volume' and 'Flow Rate'
def check_value(water_volume, flow_rate):
    # Create a DataFrame with the input values and proper column names
    input_df = pd.DataFrame([[water_volume, flow_rate]], columns=['Contamination', 'Filter Efficiency'])
    prediction = model.predict(input_df)
    return prediction[0]

# Load test data
tdf = pd.read_csv('logsTest.csv')

# Testing new inputs from 'logsTest.csv'
new_data = tdf[['Contamination', 'Filter Efficiency']].values.tolist()
for value in new_data:
    result = check_value(value[0], value[1])
    print(f'Input: Contamination = {value[0]}, Filter Efficiency = {value[1]}, Classification: {result}')
    
import joblib
joblib.dump(model, 'water_contamination.pkl')


              precision    recall  f1-score   support

   ANOMALOUS       1.00      0.83      0.91         6
      NORMAL       0.99      1.00      1.00       119

    accuracy                           0.99       125
   macro avg       1.00      0.92      0.95       125
weighted avg       0.99      0.99      0.99       125

Input: Contamination = 2.0, Filter Efficiency = 98.0, Classification: NORMAL
Input: Contamination = 2.08, Filter Efficiency = 98.06, Classification: NORMAL
Input: Contamination = 2.17, Filter Efficiency = 98.12, Classification: NORMAL
Input: Contamination = 2.25, Filter Efficiency = 98.18, Classification: NORMAL
Input: Contamination = 2.33, Filter Efficiency = 98.24, Classification: NORMAL
Input: Contamination = 1.66, Filter Efficiency = 98.29, Classification: NORMAL
Input: Contamination = 2.48, Filter Efficiency = 98.34, Classification: NORMAL
Input: Contamination = 1.56, Filter Efficiency = 98.38, Classification: NORMAL
Input: Contamination = 2.62, Filter Efficie

Input: Contamination = 1.16, Filter Efficiency = 98.04, Classification: NORMAL
Input: Contamination = 1.58, Filter Efficiency = 97.98, Classification: NORMAL
Input: Contamination = 0.71, Filter Efficiency = 97.92, Classification: NORMAL
Input: Contamination = 1.05, Filter Efficiency = 97.86, Classification: NORMAL
Input: Contamination = 1.03, Filter Efficiency = 97.8, Classification: NORMAL
Input: Contamination = 1.01, Filter Efficiency = 97.74, Classification: NORMAL
Input: Contamination = 1.0, Filter Efficiency = 97.69, Classification: NORMAL
Input: Contamination = 1.0, Filter Efficiency = 97.64, Classification: NORMAL
Input: Contamination = 1.0, Filter Efficiency = 97.6, Classification: NORMAL
Input: Contamination = 1.01, Filter Efficiency = 97.57, Classification: NORMAL
Input: Contamination = 1.03, Filter Efficiency = 71.92, Classification: ANOMALOUS
Input: Contamination = 1.06, Filter Efficiency = 97.52, Classification: NORMAL
Input: Contamination = 1.09, Filter Efficiency = 97.51

Input: Contamination = 1.99, Filter Efficiency = 98.5, Classification: NORMAL
Input: Contamination = 1.91, Filter Efficiency = 98.5, Classification: NORMAL
Input: Contamination = 1.83, Filter Efficiency = 98.48, Classification: NORMAL
Input: Contamination = 1.74, Filter Efficiency = 98.46, Classification: NORMAL
Input: Contamination = 1.66, Filter Efficiency = 98.44, Classification: NORMAL
Input: Contamination = 1.59, Filter Efficiency = 98.4, Classification: NORMAL
Input: Contamination = 1.51, Filter Efficiency = 98.36, Classification: NORMAL
Input: Contamination = 1.11, Filter Efficiency = 98.32, Classification: NORMAL
Input: Contamination = 1.37, Filter Efficiency = 98.26, Classification: NORMAL
Input: Contamination = 1.31, Filter Efficiency = 98.21, Classification: NORMAL
Input: Contamination = 1.25, Filter Efficiency = 98.15, Classification: NORMAL
Input: Contamination = 1.2, Filter Efficiency = 98.09, Classification: NORMAL
Input: Contamination = 1.15, Filter Efficiency = 98.03, 

Input: Contamination = 2.95, Filter Efficiency = 97.52, Classification: NORMAL
Input: Contamination = 2.97, Filter Efficiency = 97.55, Classification: NORMAL
Input: Contamination = 2.99, Filter Efficiency = 97.58, Classification: NORMAL
Input: Contamination = 3.0, Filter Efficiency = 97.61, Classification: NORMAL
Input: Contamination = 3.0, Filter Efficiency = 97.65, Classification: NORMAL
Input: Contamination = 3.0, Filter Efficiency = 97.7, Classification: NORMAL
Input: Contamination = 2.98, Filter Efficiency = 97.75, Classification: NORMAL
Input: Contamination = 2.97, Filter Efficiency = 97.81, Classification: NORMAL
Input: Contamination = 2.94, Filter Efficiency = 97.87, Classification: NORMAL
Input: Contamination = 2.91, Filter Efficiency = 97.93, Classification: NORMAL
Input: Contamination = 2.87, Filter Efficiency = 97.99, Classification: NORMAL
Input: Contamination = 2.83, Filter Efficiency = 98.05, Classification: NORMAL
Input: Contamination = 2.78, Filter Efficiency = 98.12, 

['water_contamination.pkl']

In [5]:
model2 = joblib.load('water_contamination.pkl')

In [6]:
def ck_value(water_volume, flow_rate):
    # Create a DataFrame with the input values and proper column names
    input_df = pd.DataFrame([[water_volume, flow_rate]], columns=['Contamination', 'Filter Efficiency'])
    prediction = model2.predict(input_df)
    return prediction[0]

tdf = pd.read_csv('logsTest.csv')

# Testing new inputs
new_data = tdf[['Contamination', 'Filter Efficiency']].values.tolist()
for value in new_data:
    result = ck_value(value[0], value[1])
    print(f'Input: Contamination = {value[0]}, Filter Efficiency = {value[1]}, Classification: {result}')

Input: Contamination = 2.0, Filter Efficiency = 98.0, Classification: NORMAL
Input: Contamination = 2.08, Filter Efficiency = 98.06, Classification: NORMAL
Input: Contamination = 2.17, Filter Efficiency = 98.12, Classification: NORMAL
Input: Contamination = 2.25, Filter Efficiency = 98.18, Classification: NORMAL
Input: Contamination = 2.33, Filter Efficiency = 98.24, Classification: NORMAL
Input: Contamination = 1.66, Filter Efficiency = 98.29, Classification: NORMAL
Input: Contamination = 2.48, Filter Efficiency = 98.34, Classification: NORMAL
Input: Contamination = 1.56, Filter Efficiency = 98.38, Classification: NORMAL
Input: Contamination = 2.62, Filter Efficiency = 98.42, Classification: NORMAL
Input: Contamination = 2.68, Filter Efficiency = 98.45, Classification: NORMAL
Input: Contamination = 2.74, Filter Efficiency = 98.47, Classification: NORMAL
Input: Contamination = 2.79, Filter Efficiency = 98.49, Classification: NORMAL
Input: Contamination = 2.84, Filter Efficiency = 98.5,

Input: Contamination = 1.53, Filter Efficiency = 98.37, Classification: NORMAL
Input: Contamination = 1.46, Filter Efficiency = 98.33, Classification: NORMAL
Input: Contamination = 1.39, Filter Efficiency = 98.28, Classification: NORMAL
Input: Contamination = 1.32, Filter Efficiency = 98.22, Classification: NORMAL
Input: Contamination = 1.27, Filter Efficiency = 98.16, Classification: NORMAL
Input: Contamination = 1.21, Filter Efficiency = 98.1, Classification: NORMAL
Input: Contamination = 1.16, Filter Efficiency = 98.04, Classification: NORMAL
Input: Contamination = 1.58, Filter Efficiency = 97.98, Classification: NORMAL
Input: Contamination = 0.71, Filter Efficiency = 97.92, Classification: NORMAL
Input: Contamination = 1.05, Filter Efficiency = 97.86, Classification: NORMAL
Input: Contamination = 1.03, Filter Efficiency = 97.8, Classification: NORMAL
Input: Contamination = 1.01, Filter Efficiency = 97.74, Classification: NORMAL
Input: Contamination = 1.0, Filter Efficiency = 97.69,

Input: Contamination = 2.97, Filter Efficiency = 97.8, Classification: NORMAL
Input: Contamination = 2.95, Filter Efficiency = 97.86, Classification: NORMAL
Input: Contamination = 2.92, Filter Efficiency = 97.92, Classification: NORMAL
Input: Contamination = 2.88, Filter Efficiency = 97.98, Classification: NORMAL
Input: Contamination = 2.84, Filter Efficiency = 98.04, Classification: NORMAL
Input: Contamination = 2.79, Filter Efficiency = 98.1, Classification: NORMAL
Input: Contamination = 2.73, Filter Efficiency = 98.16, Classification: NORMAL
Input: Contamination = 2.68, Filter Efficiency = 88.98, Classification: ANOMALOUS
Input: Contamination = 2.61, Filter Efficiency = 98.28, Classification: NORMAL
Input: Contamination = 2.54, Filter Efficiency = 98.33, Classification: NORMAL
Input: Contamination = 2.47, Filter Efficiency = 98.37, Classification: NORMAL
Input: Contamination = 2.4, Filter Efficiency = 98.41, Classification: NORMAL
Input: Contamination = 2.32, Filter Efficiency = 98.

Input: Contamination = 1.75, Filter Efficiency = 98.18, Classification: NORMAL
Input: Contamination = 1.83, Filter Efficiency = 116.53, Classification: ANOMALOUS
Input: Contamination = 1.92, Filter Efficiency = 98.06, Classification: NORMAL
Input: Contamination = 2.0, Filter Efficiency = 98.0, Classification: NORMAL
Input: Contamination = 2.08, Filter Efficiency = 97.94, Classification: NORMAL
Input: Contamination = 2.17, Filter Efficiency = 97.88, Classification: NORMAL
Input: Contamination = 2.25, Filter Efficiency = 97.82, Classification: NORMAL
Input: Contamination = 2.33, Filter Efficiency = 97.76, Classification: NORMAL
Input: Contamination = 2.41, Filter Efficiency = 97.71, Classification: NORMAL
Input: Contamination = 2.48, Filter Efficiency = 97.66, Classification: NORMAL
Input: Contamination = 2.55, Filter Efficiency = 97.62, Classification: NORMAL
Input: Contamination = 2.62, Filter Efficiency = 97.58, Classification: NORMAL
Input: Contamination = 2.68, Filter Efficiency = 9