In [2]:
import tensorflow_federated as tff


In [3]:
import csv

# Define the dataset columns and encoding
DATASET_COLUMNS = ["sentiment", "ids", "date", "flag", "user", "text"]
DATASET_ENCODING = "ISO-8859-1"

# Load the data
dataset = []
with open('C:/Users/IDEH/Desktop/Sentimental/Data/Sentiment140.csv', encoding=DATASET_ENCODING) as csvfile:
    csvreader = csv.reader(csvfile)
    for row in csvreader:
        dataset.append(row)

# Convert the data to a dictionary format
data_dict = {col: [] for col in DATASET_COLUMNS}
for row in dataset:
    for col, value in zip(DATASET_COLUMNS, row):
        data_dict[col].append(value)

# Example of accessing the data
print(data_dict['text'][:5])  # Print the first 5 text entries


["@switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer.  You shoulda got David Carr of Third Day to do it. ;D", "is upset that he can't update his Facebook by texting it... and might cry as a result  School today also. Blah!", '@Kenichan I dived many times for the ball. Managed to save 50%  The rest go out of bounds', 'my whole body feels itchy and like its on fire ', "@nationwideclass no, it's not behaving at all. i'm mad. why am i here? because I can't see you all over there. "]


In [4]:
import csv

# Load the preprocessed data
preprocessed_data = []
with open('C:/Users/IDEH/Desktop/Sentimental/Data/preprocessed_data.csv', encoding='ISO-8859-1') as csvfile:
    csvreader = csv.reader(csvfile)
    for row in csvreader:
        preprocessed_data.append(row)

# Convert the data to a dictionary format
preprocessed_dict = {'text': [], 'sentiment': []}
for row in preprocessed_data[1:]:  # Skip the header row
    preprocessed_dict['text'].append(row[0])
    preprocessed_dict['sentiment'].append(row[1])

# Example of accessing the preprocessed data
print(preprocessed_dict['text'][:5])  # Print the first 5 text entries


['USER URL aww that s a bummer you shoulda got david carr of third day to do it d ', 'is upset that he can t update his facebook by texting it and might cry a a result school today also blah ', 'USER i dived many time for the ball managed to save 50 the rest go out of bound ', 'my whole body feel itchy and like it on fire ', 'USER no it s not behaving at all i m mad why am i here because i can t see you all over there ']


In [5]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Initialize the tokenizer
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')

# Fit the tokenizer on the preprocessed text
tokenizer.fit_on_texts(preprocessed_dict['text'])

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(preprocessed_dict['text'])

# Pad sequences to ensure uniform length
padded_sequences = pad_sequences(sequences, padding='post', maxlen=100)

# Example of accessing the tokenized data
print(padded_sequences[:5])


[[   3   43  185   18   14    6 1160   10 3373   55  839 7876   15 1870
    29    4   50    7  153    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0]
 [  11  776   18   87   33   16  403  201  550  133 2003    7    9  303
   395    6    6 1141  151   46  281 1128    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0

In [6]:
import numpy as np

# Convert sentiment labels to integers
sentiment_labels = list(map(int, preprocessed_dict['sentiment']))

# Split the data into 10 balanced subsets
num_clients = 10
data_per_client = len(padded_sequences) // num_clients

clients_data = []
for i in range(num_clients):
    start_idx = i * data_per_client
    end_idx = (i + 1) * data_per_client
    clients_data.append((padded_sequences[start_idx:end_idx], sentiment_labels[start_idx:end_idx]))

# Example of accessing the data for the first client
client_1_data, client_1_labels = clients_data[0]
print(client_1_data[:5], client_1_labels[:5])


[[   3   43  185   18   14    6 1160   10 3373   55  839 7876   15 1870
    29    4   50    7  153    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0]
 [  11  776   18   87   33   16  403  201  550  133 2003    7    9  303
   395    6    6 1141  151   46  281 1128    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0

In [7]:
import tensorflow as tf
import tensorflow_federated as tff
import nest_asyncio

# Apply nest_asyncio
nest_asyncio.apply()

# Convert client data to tf.data.Dataset
def create_tf_dataset_for_client(data, labels):
    dataset = tf.data.Dataset.from_tensor_slices((data, labels))
    return dataset.batch(2)



In [9]:
# Create a list of tf.data.Dataset for each client
clients_tf_data = [create_tf_dataset_for_client(data, labels) for data, labels in clients_data]



In [10]:
def create_keras_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(input_dim=10000, output_dim=64, input_length=100),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model



In [11]:
# Define a TFF model wrapper
def model_fn():
    keras_model = create_keras_model()
    return tff.learning.from_keras_model(
        keras_model,
        input_spec=(tf.TensorSpec(shape=[None, 100], dtype=tf.int32),
                    tf.TensorSpec(shape=[None], dtype=tf.int32)),
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[tf.keras.metrics.BinaryAccuracy()]
    )



In [12]:
# Federated learning process
def federated_averaging_process():
    iterative_process = tff.learning.build_federated_averaging_process(
        model_fn=model_fn,
        client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.01),
        server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0)
    )
    return iterative_process



In [13]:

# Initialize the process
iterative_process = federated_averaging_process()



In [14]:
# Simulate federated learning
state = iterative_process.initialize()
num_rounds = 2  # Define the number of rounds
for round_num in range(1, num_rounds + 1):
    state, metrics = iterative_process.next(state, clients_tf_data)
    print(f'Round {round_num}, Metrics: {metrics}')