## Import the Necessary Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

## Data Preprocessing

In [None]:
df=data = pd.read_csv("Steel_industry_data.csv")
# df = pd.DataFrame(data)

# Plot kWh vs Day_of_week
df=df[df['Day_of_week'] == 'Monday']
df=df.head(25)
plt.figure(figsize=(10, 6))
# plt.plot(df['Day_of_week'], df['Usage_kWh'], marker='o', linestyle='-', color='b')
plt.plot(df['date'], df['Usage_kWh'], marker='o', linestyle='-', color='b')


# Set labels and title
plt.xlabel('Date')
plt.ylabel('kWh')
plt.title('kWh vs Monday')

# Rotate x-axis labels for better visibility
plt.xticks(rotation=45)

# Display the plot
plt.tight_layout()
plt.show()

In [None]:
df=data = pd.read_csv("Steel_industry_data.csv")
df_max_kwh = df.groupby('Day_of_week')['Usage_kWh'].max().reset_index()

# Sort by Day_of_week to ensure the days are in the correct order (if needed)
days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df_max_kwh['Day_of_week'] = pd.Categorical(df_max_kwh['Day_of_week'], categories=days_order, ordered=True)
df_max_kwh = df_max_kwh.sort_values('Day_of_week')

# Plot max kWh vs Day_of_week
plt.figure(figsize=(10, 6))
plt.plot(df_max_kwh['Day_of_week'], df_max_kwh['Usage_kWh'], marker='o', linestyle='-', color='b')

# Set labels and title
plt.xlabel('Day of the Week')
plt.ylabel('Max kWh')
plt.title('Max kWh vs Day of the Week')

# Rotate x-axis labels for better visibility
plt.xticks(rotation=45)

# Display the plot
plt.tight_layout()
plt.show()

In [None]:
data = pd.read_csv("Steel_industry_data.csv")
print(data.isnull().sum())  # Count of missing values in each column
datecol=data.iloc[:,0]
print(datecol)

data = data.drop(columns=['date'])
X_train=data.values

data.head()


In [None]:
kwh=data.iloc[:,1]
# plt.figure(figsize=(10, 6))
# plt.plot(datecol, kwh, marker='o', color='b', linestyle='-')
# plt.xlabel('Date')
# plt.ylabel('kWh')
# plt.title('Date vs kWh Consumption')
# plt.grid(True)
# plt.xticks(rotation=45)  # Rotate dates for better readability
# plt.tight_layout()       # Adjust layout for better fit
# plt.show()

In [None]:
print(X_train)


In [None]:
X_train.shape

Feature Scaling

In [None]:
#Feature Scaling
scaler = StandardScaler()


X_train[:, :7] = scaler.fit_transform(X_train[:, :7]).astype('float64')


print(X_train)
X_train.shape


In [None]:
X_train

Column tranforming and label encoding

In [None]:
# Transforming Categorical data into number matrix
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[7,8])],remainder='passthrough')
X_train=ct.fit_transform(X_train)
print(X_train)
print(X_train.shape)



In [None]:
#label encoding on the suitable column
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
X_train[:,-1]=le.fit_transform(X_train[:,-1])
print(X_train)
type(X_train)
df=pd.DataFrame(X_train)
df.to_csv('scaled_Steel_industry_data.csv', index=False, header=True)

## Federated Learning

In [None]:
#splitting the entire data into 8 nodes to perform FL
node_data = np.array_split(X_train, 8)


In [None]:
global_model = LinearRegression() #Global Model
global_model.coef_ = np.zeros(17) 
global_model.intercept_ = 0.0

Funtions for calculating and aggregating the local models 

In [None]:

def federated_averaging(models):
    """Aggregate model weights using the average."""
    coef_avg = np.mean([model.coef_ for model in models], axis=0)
    intercept_avg = np.mean([model.intercept_ for model in models])
    return coef_avg, intercept_avg

# Function to train the local model on each node's data
def train_local_model(data, global_coef, global_intercept):
  
    y = data[:, 9]   # Target (Usage_kWh)
  
    X = np.concatenate((data[:, :9], data[:, 10:]), axis=1)
    print(X.shape)
    model = LinearRegression()
    model.coef_ = global_coef  # Set global coefficients
    model.intercept_ = global_intercept  # Set global intercept
    model.fit(X, y)
    return model

In [None]:
# CallServer
#A.CalServer()
#C.CalServer()

Federated Learning Process

In [None]:

for iteration in range(3):
    print(f"Iteration {iteration + 1}:")

    # Local models for each node
    local_models = []

    # Train each node's model using 1460 rows in this iteration
    for i, node in enumerate(node_data):
        start = 0
        end = start + 4380
        node_subset = node[start:end]

        # Train the local model on the subset
        # local_model = train_local_model(node_subset)
        local_model = train_local_model(node_subset, global_model.coef_, global_model.intercept_)
        local_models.append(local_model)

        # Evaluate the local model
        y_pred = local_model.predict(np.concatenate((node_subset[:, :9], node_subset[:, 10:]), axis=1))
        mse = mean_squared_error(node_subset[:, 9], y_pred)
        print(f"  Node {i + 1} - MSE: {mse:.4f}")

    # Aggregate the local model updates using Federated Averaging
    coef_avg, intercept_avg = federated_averaging(local_models)

    # Update the global model with aggregated parameters
    global_model.coef_ = coef_avg
    global_model.intercept_ = intercept_avg

    print(f"Global model updated after iteration {iteration + 1}.\n")


Final Evaluation of our Global model

In [None]:
# Final Evaluation : Use some test data for evaluation
test_data = node_data[0][:4380]  # Example: Using first 1460 rows from node 1 as test data


y_test = test_data[:, 9]   # Target (Usage_kWh)
    # Select all columns except the 12th column using slicing
X_test= np.concatenate((test_data[:, :9], test_data[:, 10:]), axis=1)

df=pd.DataFrame(y_test)
df.to_csv('test_data.csv', index=False, header=True)

y_pred = global_model.predict(X_test)
final_mse = mean_squared_error(y_test, y_pred)
print(f"Final Global Model MSE on Test Data: {final_mse:.4f}")

comparing the predicted results vs the actual results

In [None]:
output=np.concatenate((y_pred.reshape(-1,1),y_test.reshape(-1,1)),1)

df=pd.DataFrame(output)
df.to_csv("output.csv", index=False, header=True)

## R2 Score with FL

In [None]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)
print(r2) #With FL

## Visualizing the Results (FL)

In [None]:
# plt.scatter(y_pred,y_test,color='red')

# plt.title("FL using Linear reg")
# plt.xlabel('y_pred')
# plt.ylabel('y_test')
# plt.show()

In [None]:

# # Scatter plot of predictions vs actual values
# plt.scatter(y_pred, y_test, color='red')

# # Plot y=x line
# min_val = min(min(y_pred), min(y_test))
# max_val = max(max(y_pred), max(y_test))
# plt.plot([min_val, max_val], [min_val, max_val], color='blue', linestyle='--', label='y = x')

# # Adding title and labels
# plt.title("FL using Linear Regression")
# plt.xlabel('y_pred')
# plt.ylabel('y_test')
# plt.legend()  # Show the legend
# plt.show()


In [None]:
import matplotlib.pyplot as plt

# Increase figure size for better readability
plt.figure(figsize=(10, 8))

# Scatter plot with smaller markers and added transparency
plt.scatter(y_pred, y_test, color='red', s=10, alpha=0.6)

# Plot y=x line with increased thickness for clarity
min_val = min(min(y_pred), min(y_test))
max_val = max(max(y_pred), max(y_test))
plt.plot([min_val, max_val], [min_val, max_val], color='blue', linestyle='--', linewidth=2, label='y = x')

# Adding title and labels
plt.title("FL using Linear Regression")
plt.xlabel('y_pred')
plt.ylabel('y_test')
plt.legend()  # Show the legend
plt.grid(True)  # Add a grid for easier reading

# Show the plot
plt.show()


## Comparision of Global model if done Without FL

In [None]:
regressor=LinearRegression()
without_FL=X_train

X_withoutFL= np.concatenate((without_FL[:, :9], without_FL[:, 10:]), axis=1)
y_withoutFL = without_FL[:, 9]   # Target (Usage_kWh)


In [None]:
from sklearn.model_selection import train_test_split

X_withoutFL_train, X_withoutFL_test,y_withoutFL_train,y_withoutFL_test = train_test_split(X_withoutFL,y_withoutFL,test_size=0.25,random_state=0)

In [None]:
X_withoutFL_test.shape

In [None]:
X_withoutFL_train.shape

In [None]:
y_withoutFL_test.shape

In [None]:
y_withoutFL_train.shape

In [None]:
regressor.fit(X_withoutFL_train,y_withoutFL_train)

In [None]:
pred=regressor.predict(X_test)

print(pred)

In [None]:
y_test

## Visualising the results (Without FL)

In [None]:
# plt.scatter(pred,y_test,color='red')

# plt.title("Training the same dataset without FL (regular LR)")
# plt.xlabel('predicted')
# plt.ylabel('actual')
# plt.show()

In [None]:
import matplotlib.pyplot as plt

# Increase figure size for better readability
plt.figure(figsize=(10, 8))

# Scatter plot with smaller markers and added transparency
plt.scatter(pred, y_test, color='red', s=10, alpha=0.6)

# Plot y=x line with increased thickness for clarity
min_val = min(min(pred), min(y_test))
max_val = max(max(pred), max(y_test))
plt.plot([min_val, max_val], [min_val, max_val], color='blue', linestyle='--', linewidth=2, label='y = x')

# Adding title and labels
plt.title("Linear Regression (Without FL)")
plt.xlabel('pred')
plt.ylabel('y_test')
plt.legend()  # Show the legend
plt.grid(True)  # Add a grid for easier reading

# Show the plot
plt.show()


In [None]:

# # Scatter plot of predictions vs actual values
# plt.scatter(pred, y_test, color='red')

# # Plot y=x line
# min_val = min(min(pred), min(y_test))
# max_val = max(max(pred), max(y_test))
# plt.plot([min_val, max_val], [min_val, max_val], color='blue', linestyle='--', label='y = x')

# # Adding title and labels
# plt.title("Training the same dataset without FL (regular LR)")
# plt.xlabel('predicted')
# plt.ylabel('actual')
# plt.legend()  # Show the legend
# plt.show()


In [None]:
pred.shape

In [None]:
y_test

In [None]:
pred

## R2 score without FL

In [None]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test, pred)
print(r2) #Without FL