In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from keras import layers
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train_df = pd.read_csv('/kaggle/input/aids-data-quest-society/train_ds.csv', index_col=0)
train_df.dropna(inplace=True) 
train_df['Acidity'] = train_df['Acidity'].astype(float)
train_df['Quality'] = train_df['Quality'].map({"good": 1, "bad": 0})
plt.rcParams['figure.figsize']=[20,10]
train_df.hist()
plt.show

In [None]:
plt.rcParams['figure.figsize']=[20,30]
train_df.plot(kind='density',subplots=True,sharex=True)
plt.show()

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))
scaled_train = scaler.fit_transform(train_df.iloc[:, :-1])
x_train = scaled_train
y_train = train_df['Quality'].values
input_shape = [x_train.shape[1]]
model = keras.Sequential([
    layers.BatchNormalization(input_shape=input_shape),
    layers.Dense(units=128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(units=64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(units=1, activation='sigmoid')    
])

In [None]:
model.summary()
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics='binary_accuracy'
)

In [None]:
history = model.fit(
    x_train, y_train,
    batch_size=128,
    epochs=200,
    callbacks=[keras.callbacks.EarlyStopping(patience=5, min_delta=0.001, restore_best_weights=True)]
)

In [None]:
test_df = pd.read_csv('/kaggle/input/aids-data-quest-society/test_ds.csv')
test_df.dropna(inplace=True)
scaled_test = scaler.transform(test_df.iloc[:, 1:])
submission_predictions = model.predict(scaled_test)
submission_predictions = np.round(submission_predictions).astype(int)
submission_labels = {1: 'good', 0: 'bad'}
submission_predictions = [submission_labels[pred[0]] for pred in submission_predictions]
submission_df = pd.DataFrame({'ID': test_df['ID'], 'Quality': submission_predictions})
submission_df.to_csv('submission.csv', index=False)
submission_df

In [None]:
from sklearn.metrics import accuracy_score, mean_absolute_error, precision_score, classification_report
y_pred_train = model.predict(x_train)
y_pred_train = np.round(y_pred_train).astype(int)
acc=accuracy_score(y_train, y_pred_train)
print('accuracy : ',acc)
prec = precision_score(y_train, y_pred_train)
print('precision : ',prec)
mae = mean_absolute_error(y_train, y_pred_train)
print('MAE : ',mae)
report=classification_report(y_train, y_pred_train)
print('report : ',report)

In [None]:
output_file_path = '/kaggle/working/classification_report.csv'
with open(output_file_path, 'w') as f:
    f.write("Accuracy: {}\n".format(acc))
    f.write("Mean Absolute Error: {}".format(mae))
    f.write("Classification Report:\n")
    f.write(report)
output_file_path