<a href="https://colab.research.google.com/github/masengeshoseverain/Noise-app-monitor-system-/blob/IT/noise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports

In [None]:
import pandas as pd
import numpy as np
import datetime

# Define the number of samples for training and testing datasets
n_train = 10000
n_test = 2000

# Define additional fields
location_types = ['Indoor', 'Outdoor']
genders = ['Male', 'Female', 'Other']
hearing_sensitivity = ['Normal', 'Mild', 'Moderate', 'Severe']
health_issues = ['Healthy', 'Diabetes', 'Hypertension', 'Heart Disease', 'Other']
environments = ['Residential', 'Workplace', 'Recreational', 'Traffic', 'Industrial']

# Generate random data
np.random.seed(42)  # For reproducibility

def generate_data(num_samples):
    timestamps = [datetime.datetime.now() - datetime.timedelta(minutes=15*i) for i in range(num_samples)]
    user_ids = np.random.randint(1000, 9999, num_samples)
    noise_levels = np.random.uniform(0, 100, num_samples)  # Mean = 65dB, Std = 10dB
    durations = np.random.normal(60, 20, num_samples)  # Mean = 60 minutes, Std = 20 minutes
    ages = np.random.randint(18, 65, num_samples)
    data = {
        'Timestamp': [ts.strftime('%Y-%m-%d %H:%M:%S') for ts in timestamps],
        'User_ID': user_ids,
        'Location_Type': np.random.choice(location_types, num_samples),
        'Environment': np.random.choice(environments, num_samples),
        'Noise_Level_dB': noise_levels,
        'Duration_Minutes': durations,
        'Age': ages,
        'Gender': np.random.choice(genders, num_samples),
        'Hearing_Protection_Used': np.random.choice(['Yes', 'No'], num_samples),
        'Hearing_Sensitivity': np.random.choice(hearing_sensitivity, num_samples),
        'Health_Issues': np.random.choice(health_issues, num_samples)
    }
    # Simulate hearing damage risk
    data['Hearing_Damage_Risk'] = (data['Noise_Level_dB'] > 90) | (data['Duration_Minutes'] > 480)  # 8 hours threshold
    data['Hearing_Damage_Risk'] = data['Hearing_Damage_Risk'].astype(int)  # Convert to binary
    return pd.DataFrame(data)

# Generate training and testing datasets
train_data = generate_data(n_train)
test_data = generate_data(n_test)

# Save to CSV files
train_data.to_csv("train.csv", index=False)
test_data.to_csv("test.csv", index=False)

print("Data generation complete. Files saved as 'simulated_train_data_extended.csv' and 'simulated_test_data_extended.csv'.")


Data generation complete. Files saved as 'simulated_train_data_extended.csv' and 'simulated_test_data_extended.csv'.


In [None]:
!pip install joblib



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load the dataset
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# Drop unnecessary columns
train = train.drop(['Timestamp', 'User_ID'], axis=1)
test = test.drop(['Timestamp', 'User_ID'], axis=1)

data = pd.concat([train, test])


# Handle missing values (if any)
data = data.dropna()

# Define categorical and numerical columns
categorical_columns = ['Location_Type', 'Environment', 'Gender', 'Hearing_Protection_Used', 'Hearing_Sensitivity', 'Health_Issues']
numerical_columns = ['Noise_Level_dB', 'Duration_Minutes', 'Age']

# One-hot encode categorical variables and scale numerical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),
        ('cat', OneHotEncoder(), categorical_columns)
    ])

# Split data into features and target
X = data.drop('Hearing_Damage_Risk', axis=1)
y = data['Hearing_Damage_Risk']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply preprocessing
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# Convert sparse matrices to dense arrays
# X_train = X_train.toarray()
# X_test = X_test.toarray()

# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model (optional)
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy}')


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy: 0.9925000071525574


In [None]:

import tf2onnx

# Convert the Keras model to ONNX
onnx_model, _ = tf2onnx.convert.from_keras(model, opset=13, output_path='nihl.onnx')
output_names = [n.name for n in onnx_model.graph.output]
print(output_names)

# Save the ONNX model to a file
output_path = 'nihl.onnx'

# with open(output_path, "wb") as f:  # Use `open` to save the model
#     f.write(onnx_model.SerializeToString())  # Serialize and write to file

['dense_5']


In [None]:


import onnxruntime as rt

# Load the ONNX model
sess = rt.InferenceSession("nihl.onnx")

# Get the input and output names
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name
sample_data = {
    'Location_Type': 'Indoor',
    'Environment': 'Traffic',
    'Age': '30',
    'Gender': 'Male',
    'Hearing_Protection_Used': 'No',
    'Hearing_Sensitivity': 'Severe',
    'Health_Issues': 'Diabetes',
    'Noise_Level_dB': 76,
    'Duration_Minutes': 6
}

# Convert the sample data to a DataFrame
sample_df = pd.DataFrame([sample_data])

# Example input data (replace with your actual data)
input_data = X_test[:1]  # Take the first sample from the test set

# Run inference
result = sess.run([output_name], {input_name: sample_df})

# Print the prediction
print(result)


RuntimeError: Input must be a list of dictionaries or a single numpy array for input 'dense_3_input'.

In [None]:
sample_data = {
    'Location_Type': 'Indoor',
    'Environment': 'Traffic',
    'Age': '30',
    'Gender': 'Male',
    'Hearing_Protection_Used': 'No',
    'Hearing_Sensitivity': 'Severe',
    'Health_Issues': 'Diabetes',
    'Noise_Level_dB': 76,
    'Duration_Minutes': 6
}

# Convert the sample data to a DataFrame
sd = pd.DataFrame([sample_data])
providers = ['CPUExecutionProvider']
m = rt.InferenceSession(output_path, providers=providers)
onnx_pred = m.run(output_names, {"input": sd})

print('ONNX Predicted:', onnx_pred[0][0])

# make sure ONNX and keras have the same results
# np.testing.assert_allclose(preds, onnx_pred[0], rtol=1e-5)

ValueError: Required inputs (['dense_3_input']) are missing from input feed (['input']).

# New Section

In [None]:
# Save the entire model to a HDF5 file
model.save("my_model.h5")


  saving_api.save_model(


In [None]:
import joblib

# Save the preprocessor
joblib.dump(preprocessor, 'preprocessor.pkl')


['preprocessor.pkl']

In [None]:
import tensorflow as tf
import pandas as pd
import joblib

# Load the saved model
loaded_model = tf.keras.models.load_model("my_model.h5")

# Load the preprocessor
loaded_preprocessor = joblib.load('preprocessor.pkl')

sample_data = {
    'Location_Type': 'Indoor',
    'Environment': 'Traffic',
    'Age': '30',
    'Gender': 'Male',
    'Hearing_Protection_Used': 'No',
    'Hearing_Sensitivity': 'Severe',
    'Health_Issues': 'Diabetes',
    'Noise_Level_dB': 76,
    'Duration_Minutes': 6
}

# Convert the sample data to a DataFrame
sample_df = pd.DataFrame([sample_data])

# Preprocess the sample data using the loaded preprocessor
sample_df_processed = loaded_preprocessor.transform(sample_df)

# Make prediction
prediction = loaded_model.predict(sample_df_processed)[0]

# The prediction will be a probability between 0 and 1.
# You can interpret it as the likelihood of hearing damage risk.
print(prediction)




[5.537884e-23]


In [None]:
import pandas as pd
sample_data = {
    'Location_Type': 'Indoor',
    'Environment': 'Traffic',
    'Age': '30',
    'Gender': 'Male',
    'Hearing_Protection_Used': 'Yes',
    'Hearing_Sensitivity': 'Normal',
    'Health_Issues': 'Healthy',
    'Noise_Level_dB': 94,
    'Duration_Minutes': 480
}

# Convert the sample data to a DataFrame
sample_df = pd.DataFrame([sample_data])

# Preprocess the sample data
sample_transformed = preprocessor.transform(sample_df)

# Make a prediction
prediction = model.predict(sample_transformed)[0]

print(prediction[0])


2.6889239e-27
