Load and unzip the CSV files into a DataFrame.

In [None]:
import zipfile
import pandas as pd

with zipfile.ZipFile('data.zip', 'r') as z:
    z.extractall('data/')
    df = pd.read_csv('data/data.csv')

Preprocess the data to clean and prepare it for modeling.

In [None]:
def preprocess(data):
    # Placeholder for preprocessing steps
    return data

df_cleaned = preprocess(df)

Split the data into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df_cleaned, test_size=0.2, random_state=42)

Load the pre-trained BERT model and tokenizer.

In [None]:
from transformers import BertModel, BertTokenizer

model = BertModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Add a classifier and compile the model.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(768,)))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Train the model using the training data.

In [None]:
model.fit(train_df['text'], train_df['label'], epochs=3, batch_size=32)

Evaluate the model using Hamming loss and recall metrics.

In [None]:
from sklearn.metrics import hamming_loss, recall_score

predictions = model.predict(test_df['text'])
loss = hamming_loss(test_df['label'], predictions)
recall = recall_score(test_df['label'], predictions, average='binary')

Perform cleanup by deleting model and data files.

In [None]:
import os
del model
os.remove('data/data.csv')