In [22]:
import pandas as pd

# Load data from CSV file
data = pd.read_csv('wireless_traffic_dataset.csv')

# Display the first few rows of the dataframe
data.head()

Unnamed: 0,Tower_ID,Signal_Strength(dBm),Network_Traffic(MB),Latency(ms),User_Count,Device_Type
0,Tower_8,-66,430.07,770,232,IoT Device
1,Tower_80,-79,495.16,485,337,Laptop
2,Tower_72,-63,269.3,20,326,IoT Device
3,Tower_55,-84,12.13,503,467,Tablet
4,Tower_69,-44,93.6,575,15,Smartphone


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from joblib import Parallel, delayed

# Load your dataset
# data = pd.read_csv('your_dataset.csv')

# Function to check if a tower is rural (assuming primary numbers indicate rural towers)
def is_rural_tower(tower_id):
    # For demonstration, let's assume rural if the ID is even
    int_tower_id = tower_id.strip().replace('Tower_', '')
    return str(int_tower_id).isdigit() and int(int_tower_id) % 2 == 0

# Encode categorical variables using one-hot encoding
data = pd.get_dummies(data, columns=['Device_Type'], drop_first=True)

# Split data into urban and rural subsets
urban_data = data[~data['Tower_ID'].apply(is_rural_tower)]
rural_data = data[data['Tower_ID'].apply(is_rural_tower)]

# Define target variable (Latency(ms)) and features
target_column = 'Latency(ms)'
feature_columns = [col for col in urban_data.columns if col not in ['Tower_ID', target_column]]

X_urban, y_urban = urban_data[feature_columns], urban_data[target_column]
X_rural, y_rural = rural_data[feature_columns], rural_data[target_column]

# Train-test split
X_urban_train, X_urban_test, y_urban_train, y_urban_test = train_test_split(X_urban, y_urban, test_size=0.2)
X_rural_train, X_rural_test, y_rural_train, y_rural_test = train_test_split(X_rural, y_rural, test_size=0.2)

# Combine datasets for the full model training
combined_data = pd.concat([urban_data, rural_data])
X_combined, y_combined = combined_data[feature_columns], combined_data[target_column]
X_full_train, X_full_test, y_full_train, y_full_test = train_test_split(X_combined, y_combined, test_size=0.2)

# Define a function to train and evaluate model
def train_and_evaluate(X_train, X_test, y_train, y_test):
    model = RandomForestRegressor()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return mse

# Create a list of delayed function calls
tasks = [
    delayed(train_and_evaluate)(X_urban_train, X_urban_test, y_urban_train, y_urban_test),
    delayed(train_and_evaluate)(X_rural_train, X_rural_test, y_rural_train, y_rural_test),
    delayed(train_and_evaluate)(X_full_train, X_full_test, y_full_train, y_full_test)
]

# Execute the tasks in parallel
results = Parallel(n_jobs=3)(tasks)

mse_urban, mse_rural, mse_full = results

print(f"Urban Model MSE: {mse_urban}")
print(f"Rural Model MSE: {mse_rural}")
print(f"Full Dataset Model MSE: {mse_full}")

# Compare the performances

Urban Model MSE: 84729.77293489933
Rural Model MSE: 87921.52955197368
Full Dataset Model MSE: 80684.529272
