In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load the dataset
df = pd.read_csv('../datasets/waterQuantityLogsTrain.csv')

# Assuming the columns in the CSV are named 'Water Volume', 'Flow Rate', and 'Status'
data = {
    'Water Volume': df['Water Volume'].tolist(),
    'Flow Rate': df['Flow Rate'].tolist(),
    'Status': df['Status'].tolist()
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Feature and label separation
X = df[['Water Volume', 'Flow Rate']]
y = df['Status']

# Check the number of unique classes
num_classes = y.nunique()

if len(X) < num_classes:
    raise ValueError(f"Not enough samples to create a test set with each class present. Total samples: {len(X)}, Classes: {num_classes}")

# Adjusted test size
test_size = 0.25  # 25% test size

# Stratified train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)

# Train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, zero_division=0))

# Function to classify new input data based on 'Water Volume' and 'Flow Rate'
def check_value(water_volume, flow_rate):
    # Create a DataFrame with the input values and proper column names
    input_df = pd.DataFrame([[water_volume, flow_rate]], columns=['Water Volume', 'Flow Rate'])
    prediction = model.predict(input_df)
    return prediction[0]

# Load test data
tdf = pd.read_csv('../datasets/waterQuantityLogsTest.csv')

labels = ['Water Volume', 'Flow Rate']  # You can modify this list as needed
new_values = tdf[labels].values.tolist()  # Extract rows for the selected features
status_values = tdf['Status'].tolist()  # Extract the 'status' column for comparison

# Iterate through the inputs and make predictions
for values, status in zip(new_values, status_values):
    # Dynamically create the Input label-output string
    input_details = ', '.join(f"{label}: {value}" for label, value in zip(labels, values))
    print(f'Input: ({input_details}), Classification: {result}, Real Result: {status}')
    
import joblib
joblib.dump(model, '../models/water_quantity.pkl')


              precision    recall  f1-score   support

   ANOMALOUS       1.00      1.00      1.00         4
      NORMAL       1.00      1.00      1.00       121

    accuracy                           1.00       125
   macro avg       1.00      1.00      1.00       125
weighted avg       1.00      1.00      1.00       125

Input: Water Volume = 997.5, Flow Rate = 10.0, Classification: NORMAL
Input: Water Volume = 994.98, Flow Rate = 10.08, Classification: NORMAL
Input: Water Volume = 992.44, Flow Rate = 10.16, Classification: NORMAL
Input: Water Volume = 989.88, Flow Rate = 10.24, Classification: NORMAL
Input: Water Volume = 987.3, Flow Rate = 10.31, Classification: NORMAL
Input: Water Volume = 984.71, Flow Rate = 10.37, Classification: NORMAL
Input: Water Volume = 982.1, Flow Rate = 10.42, Classification: NORMAL
Input: Water Volume = 979.49, Flow Rate = 10.46, Classification: NORMAL
Input: Water Volume = 976.87, Flow Rate = 10.49, Classification: NORMAL
Input: Water Volume = 974.24,

Input: Water Volume = 624.99, Flow Rate = 9.85, Classification: NORMAL
Input: Water Volume = 622.5, Flow Rate = 9.93, Classification: NORMAL
Input: Water Volume = 620.0, Flow Rate = 8.04, Classification: NORMAL
Input: Water Volume = 617.47, Flow Rate = 10.1, Classification: NORMAL
Input: Water Volume = 614.93, Flow Rate = 10.18, Classification: NORMAL
Input: Water Volume = 612.36, Flow Rate = 10.25, Classification: NORMAL
Input: Water Volume = 609.78, Flow Rate = 10.32, Classification: NORMAL
Input: Water Volume = 607.19, Flow Rate = 10.38, Classification: NORMAL
Input: Water Volume = 604.58, Flow Rate = 10.43, Classification: NORMAL
Input: Water Volume = 601.96, Flow Rate = 10.47, Classification: NORMAL
Input: Water Volume = 599.34, Flow Rate = 10.49, Classification: NORMAL
Input: Water Volume = 596.72, Flow Rate = 10.5, Classification: NORMAL
Input: Water Volume = 594.09, Flow Rate = 10.5, Classification: NORMAL
Input: Water Volume = 591.47, Flow Rate = 10.48, Classification: NORMAL


Input: Water Volume = 269.17, Flow Rate = 9.51, Classification: NORMAL
Input: Water Volume = 266.79, Flow Rate = 9.5, Classification: NORMAL
Input: Water Volume = 264.42, Flow Rate = 9.5, Classification: NORMAL
Input: Water Volume = 262.04, Flow Rate = 9.52, Classification: NORMAL
Input: Water Volume = 259.65, Flow Rate = 9.55, Classification: NORMAL
Input: Water Volume = 257.25, Flow Rate = 9.6, Classification: NORMAL
Input: Water Volume = 254.83, Flow Rate = 9.65, Classification: NORMAL
Input: Water Volume = 252.41, Flow Rate = 9.72, Classification: NORMAL
Input: Water Volume = 249.96, Flow Rate = 9.79, Classification: NORMAL
Input: Water Volume = 247.49, Flow Rate = 9.87, Classification: NORMAL
Input: Water Volume = 245.0, Flow Rate = 9.95, Classification: NORMAL
Input: Water Volume = 242.49, Flow Rate = 10.03, Classification: NORMAL
Input: Water Volume = 239.96, Flow Rate = 10.12, Classification: NORMAL
Input: Water Volume = 237.42, Flow Rate = 10.2, Classification: NORMAL
Input: W

Input: Water Volume = 0.0, Flow Rate = 10.13, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 10.04, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.96, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.88, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.8, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.73, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.66, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.61, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.56, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.53, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.51, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.5, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.51, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.53, Classification: NORMAL
Input: Water Volume = 0.0, Flow Rate = 9.56, Cla

['../models/water_quantity.pkl']

In [4]:
model2 = joblib.load('../models/water_quantity.pkl')

In [6]:
def ck_value(water_volume, flow_rate):
    # Create a DataFrame with the input_value and proper column name
    input_df = pd.DataFrame([[water_volume, flow_rate]], columns=['Water Volume', 'Flow Rate'])
    prediction = model.predict(input_df)
    return prediction[0]

tdf = pd.read_csv('../datasets/waterQuantityLogsTest.csv')

labels = ['Water Volume', 'Flow Rate']  # You can modify this list as needed
new_values = tdf[labels].values.tolist()  # Extract rows for the selected features
status_values = tdf['Status'].tolist()  # Extract the 'status' column for comparison

# Iterate through the inputs and make predictions
for values, status in zip(new_values, status_values):
    # Dynamically create the Input label-output string
    input_details = ', '.join(f"{label}: {value}" for label, value in zip(labels, values))
    print(f'Input: ({input_details}), Classification: {result}, Real Result: {status}')

Input: (Water Volume: 997.5, Flow Rate: 10.0), Classification: NORMAL, Real Result: NORMAL
Input: (Water Volume: 994.98, Flow Rate: 10.08), Classification: NORMAL, Real Result: NORMAL
Input: (Water Volume: 992.44, Flow Rate: 10.16), Classification: NORMAL, Real Result: NORMAL
Input: (Water Volume: 989.88, Flow Rate: 10.24), Classification: NORMAL, Real Result: NORMAL
Input: (Water Volume: 987.3, Flow Rate: 10.31), Classification: NORMAL, Real Result: NORMAL
Input: (Water Volume: 984.71, Flow Rate: 10.37), Classification: NORMAL, Real Result: NORMAL
Input: (Water Volume: 982.1, Flow Rate: 10.42), Classification: NORMAL, Real Result: NORMAL
Input: (Water Volume: 979.49, Flow Rate: 10.46), Classification: NORMAL, Real Result: NORMAL
Input: (Water Volume: 976.87, Flow Rate: 10.49), Classification: NORMAL, Real Result: NORMAL
Input: (Water Volume: 974.24, Flow Rate: 10.5), Classification: NORMAL, Real Result: NORMAL
Input: (Water Volume: 971.62, Flow Rate: 10.5), Classification: NORMAL, Rea