In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv1D, MaxPooling1D

# Load the dataset into a pandas dataframe
df = pd.read_csv('your_dataset.csv')


Preprocessing the data by converting the text column to a sequence of integers, as well as splitting the dataset into training and testing sets:

In [None]:
# Convert the text column to a sequence of integers
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=5000)
tokenizer.fit_on_texts(df['text'])
X = tokenizer.texts_to_sequences(df['text'])

# Pad the sequences to ensure they are all the same length
maxlen = 100
X = tf.keras.preprocessing.sequence.pad_sequences(X, padding='post', maxlen=maxlen)

# Split the dataset into training and testing sets
y = np.array(df['depression'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


Defining the CNN model and training it on the training set:

In [None]:
# Define the CNN model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(maxlen, 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train.reshape(X_train.shape[0], maxlen, 1), y_train, epochs=10, batch_size=32, validation_split=0.1)


Evaluating the model on the testing set and print out the accuracy:

In [None]:
# Evaluate the model on the testing set
loss, accuracy = model.evaluate(X_test.reshape(X_test.shape[0], maxlen, 1), y_test)

# Print out the accuracy
print('Test accuracy:', accuracy)


Finally, you can use the trained model to make predictions on new data:

In [None]:
# Make predictions on new data
new_data = ['this is a sample text', 'this is another sample text']
new_data = tokenizer.texts_to_sequences(new_data)
new_data = tf.keras.preprocessing.sequence.pad_sequences(new_data, padding='post', maxlen=maxlen)
predictions = model.predict(new_data.reshape(new_data.shape[0], maxlen, 1))

# Print out the predictions
print(predictions)


In [None]:
# Get the weights from the trained model
weights = model.get_weights()

# Print the shape of each weight array
for w in weights:
    print(w.shape)


To create a function that uses these weights to classify new text inputs, you can define a new function that takes in a sequence of integers (representing the text) and applies the weights to make a prediction. Here's an example:

In [None]:
def classify_text(text, weights):
    # Convert the text to a sequence of integers
    seq = tokenizer.texts_to_sequences([text])[0]
    
    # Pad the sequence to ensure it has the same length as the input to the CNN model
    seq = tf.keras.preprocessing.sequence.pad_sequences([seq], padding='post', maxlen=maxlen)
    
    # Apply the weights to the sequence using the CNN model architecture
    x = np.array(seq).reshape(1, maxlen, 1)
    for i in range(len(weights)):
        x = np.dot(x, weights[i])
        if i < len(weights) - 1:
            x = np.maximum(x, 0)
    
    # Return the predicted class (0 or 1)
    return int(round(x[0][0]))


This function takes in a text input (a string), as well as the weights extracted from your trained CNN model. It first converts the text to a sequence of integers using the same tokenizer used to preprocess the original data. It then pads the sequence to ensure it has the same length as the input to the CNN model, and applies the weights to the sequence using the same architecture as the CNN model. Finally, it returns the predicted class as an integer (either 0 or 1).

Here's an example of how to use this function:

In [None]:
# Get the weights from the trained model
weights = model.get_weights()

# Test the function on a new text input
text = 'I am feeling really down today'
class_prediction = classify_text(text, weights)
print('Predicted class:', class_prediction)
