<a href="https://colab.research.google.com/github/niyobern/Google-Colab-notebooks/blob/main/noise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports

In [None]:
import pandas as pd
import numpy as np
import datetime

In [None]:
n_train = 10000
n_test = 2000

location_types = ['Indoor', 'Outdoor']
genders = ['Male', 'Female', 'Other']
hearing_sensitivity = ['Normal', 'Mild', 'Moderate', 'Severe']
health_issues = ['Healthy', 'Diabetes', 'Hypertension', 'Heart Disease', 'Other']
environments = ['Residential', 'Workplace', 'Recreational', 'Traffic', 'Industrial']

np.random.seed(42)  # For reproducibility

def generate_data(num_samples):
    timestamps = [datetime.datetime.now() - datetime.timedelta(minutes=15*i) for i in range(num_samples)]
    user_ids = np.random.randint(1000, 9999, num_samples)
    noise_levels = np.random.uniform(0, 100, num_samples)  # Mean = 65dB, Std = 10dB
    durations = np.random.normal(60, 20, num_samples)  # Mean = 60 minutes, Std = 20 minutes
    ages = np.random.randint(18, 65, num_samples)
    data = {
        'Timestamp': [ts.strftime('%Y-%m-%d %H:%M:%S') for ts in timestamps],
        'User_ID': user_ids,
        'Location_Type': np.random.choice(location_types, num_samples),
        'Environment': np.random.choice(environments, num_samples),
        'Noise_Level_dB': noise_levels,
        'Duration_Minutes': durations,
        'Age': ages,
        'Gender': np.random.choice(genders, num_samples),
        'Hearing_Protection_Used': np.random.choice(['Yes', 'No'], num_samples),
        'Hearing_Sensitivity': np.random.choice(hearing_sensitivity, num_samples),
        'Health_Issues': np.random.choice(health_issues, num_samples)
    }

    data['Hearing_Damage_Risk'] = (data['Noise_Level_dB'] > 90) | (data['Duration_Minutes'] > 480)  # 8 hours threshold
    data['Hearing_Damage_Risk'] = data['Hearing_Damage_Risk'].astype(int)  # Convert to binary
    return pd.DataFrame(data)

train_data = generate_data(n_train)
test_data = generate_data(n_test)

train_data.to_csv("train.csv", index=False)
test_data.to_csv("test.csv", index=False)

print("Data generation complete. Files saved as 'simulated_train_data_extended.csv' and 'simulated_test_data_extended.csv'.")


Data generation complete. Files saved as 'simulated_train_data_extended.csv' and 'simulated_test_data_extended.csv'.


In [None]:
!pip install joblib



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

train = train.drop(['Timestamp', 'User_ID'], axis=1)
test = test.drop(['Timestamp', 'User_ID'], axis=1)

data = pd.concat([train, test])

data = data.dropna()

categorical_columns = ['Location_Type', 'Environment', 'Gender', 'Hearing_Protection_Used', 'Hearing_Sensitivity', 'Health_Issues']
numerical_columns = ['Noise_Level_dB', 'Duration_Minutes', 'Age']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),
        ('cat', OneHotEncoder(), categorical_columns)
    ])

X = data.drop('Hearing_Damage_Risk', axis=1)
y = data['Hearing_Damage_Risk']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# X_train = X_train.toarray()
# X_test = X_test.toarray()

# the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.8696 - loss: 0.3404 - val_accuracy: 0.9413 - val_loss: 0.1192
Epoch 2/50
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9417 - loss: 0.1282 - val_accuracy: 0.9754 - val_loss: 0.0759
Epoch 3/50
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9571 - loss: 0.0972 - val_accuracy: 0.9750 - val_loss: 0.0633
Epoch 4/50
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9714 - loss: 0.0729 - val_accuracy: 0.9829 - val_loss: 0.0480
Epoch 5/50
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9709 - loss: 0.0636 - val_accuracy: 0.9883 - val_loss: 0.0396
Epoch 6/50
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9801 - loss: 0.0498 - val_accuracy: 0.9917 - val_loss: 0.0329
Epoch 7/50
[1m300/300[0m 

In [None]:

import tf2onnx

onnx_model, _ = tf2onnx.convert.from_keras(model, opset=13, output_path='nihl.onnx')
output_names = [n.name for n in onnx_model.graph.output]
print(output_names)

output_path = 'nihl.onnx'

# with open(output_path, "wb") as f:  # Use `open` to save the model
#     f.write(onnx_model.SerializeToString())  # Serialize and write to file

ModuleNotFoundError: No module named 'tf2onnx'

In [None]:


import onnxruntime as rt

sess = rt.InferenceSession("nihl.onnx")

input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name
sample_data = {
    'Location_Type': 'Indoor',
    'Environment': 'Traffic',
    'Age': '30',
    'Gender': 'Male',
    'Hearing_Protection_Used': 'No',
    'Hearing_Sensitivity': 'Severe',
    'Health_Issues': 'Diabetes',
    'Noise_Level_dB': 76,
    'Duration_Minutes': 6
}

sample_df = pd.DataFrame([sample_data])

input_data = X_test[:1]  # Take the first sample from the test set

result = sess.run([output_name], {input_name: sample_df})

print(result)


In [None]:
sample_data = {
    'Location_Type': 'Indoor',
    'Environment': 'Traffic',
    'Age': '30',
    'Gender': 'Male',
    'Hearing_Protection_Used': 'No',
    'Hearing_Sensitivity': 'Severe',
    'Health_Issues': 'Diabetes',
    'Noise_Level_dB': 76,
    'Duration_Minutes': 6
}

sd = pd.DataFrame([sample_data])
providers = ['CPUExecutionProvider']
m = rt.InferenceSession(output_path, providers=providers)
onnx_pred = m.run(output_names, {"input": sd})

print('ONNX Predicted:', onnx_pred[0][0])

# np.testing.assert_allclose(preds, onnx_pred[0], rtol=1e-5)

# New Section

In [None]:
model.save("my_model.h5")


In [None]:
import joblib

joblib.dump(preprocessor, 'preprocessor.pkl')


In [None]:
import tensorflow as tf
import pandas as pd
import joblib

loaded_model = tf.keras.models.load_model("my_model.h5")

loaded_preprocessor = joblib.load('preprocessor.pkl')

sample_data = {
    'Location_Type': 'Indoor',
    'Environment': 'Traffic',
    'Age': '30',
    'Gender': 'Male',
    'Hearing_Protection_Used': 'No',
    'Hearing_Sensitivity': 'Severe',
    'Health_Issues': 'Diabetes',
    'Noise_Level_dB': 76,
    'Duration_Minutes': 6
}

sample_df = pd.DataFrame([sample_data])

sample_df_processed = loaded_preprocessor.transform(sample_df)

prediction = loaded_model.predict(sample_df_processed)[0]

print(prediction)


In [None]:
import pandas as pd
sample_data = {
    'Location_Type': 'Indoor',
    'Environment': 'Traffic',
    'Age': '30',
    'Gender': 'Male',
    'Hearing_Protection_Used': 'Yes',
    'Hearing_Sensitivity': 'Normal',
    'Health_Issues': 'Healthy',
    'Noise_Level_dB': 94,
    'Duration_Minutes': 480
}

sample_df = pd.DataFrame([sample_data])

sample_transformed = preprocessor.transform(sample_df)

prediction = model.predict(sample_transformed)[0]

print(prediction[0])
