# Importing Libraries for Data Engineering

In [1]:
import numpy as np
import pandas as pd

# Loading Dataset

In [2]:
sensors_data = pd.read_excel('Smoothed path loss of 48 sensors for 2443 positions.xlsx', header=None)

FileNotFoundError: [Errno 2] No such file or directory: 'Smoothed path loss of 48 sensors for 2443 positions.xlsx'

In [None]:
sensors_data

In [None]:
position_data = pd.read_excel('real x-y-z positions of 2443 locations.xlsx', header=None)

In [None]:
position_data

# Data Engineering

### Sensors Data -- Labeling Columns 

In [None]:
Sensors_Number_of_Columns = sensors_data.shape[1]

In [None]:
Sensors_columns = ["sensor" + str(x) for x in range(1,Sensors_Number_of_Columns + 1)]
sensors_data.columns = (Sensors_columns)

In [None]:
sensors_data

### Converting to Pandas Dataframe

In [None]:
sensors_data = pd.DataFrame(sensors_data)

### Position Data -- Labeling Columns

In [None]:
position_data.columns = ['Pos X', 'Pos Y', 'Pos Z']

In [None]:
position_data

### Converting to Pandas Dataframe

In [None]:
position_data = pd.DataFrame(position_data)

# Taking Sensor 01 - Sensor 16

In [None]:
raw_data = pd.concat([sensors_data.iloc[:,:16], position_data], axis=1)
raw_data

# Split the data into training and testing sets

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(sensors_data, position_data, test_size=0.2, random_state=42)

In [None]:
X_train

In [None]:
y_train

# Machine Learning

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, median_absolute_error
from sklearn.metrics import explained_variance_score, max_error, mean_squared_log_error

In [None]:
# Initialize the models
models = [LinearRegression(), DecisionTreeRegressor(), RandomForestRegressor(), KNeighborsRegressor(),  XGBRegressor()]

In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, median_absolute_error, explained_variance_score
import time

# Train and evaluate each model
for model in models:
    start_time = time.time()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    end_time = time.time()
    
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    
    print(type(model).__name__)
    print("Accuracy:", model.score(X_test, y_test))
    print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
    print("Root Mean Squared Error:", rmse)
    print("R2 Score:", r2_score(y_test, y_pred))
    print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
    print("Median Absolute Error:", median_absolute_error(y_test, y_pred))
    print("Explained Variance Score:", explained_variance_score(y_test, y_pred))
    print("Time taken:", end_time - start_time)
    print("="*100)


In [None]:
import warnings
warnings.filterwarnings("ignore")

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd
import time

#Load your dataset (Sensor Signals)
X = sensors_data.values

#Load your corresponding X, Y and Z Position values
y = position_data.values

#Define the number of splits for KFold cross-validation
num_splits = 5
kf = KFold(n_splits=num_splits)

#Create dictionaries to store the evaluation metrics for each model
mae_scores = {'LinearRegression': [], 'DecisionTreeRegressor': [], 'RandomForestRegressor': [], 'KNeighborsRegressor': [], 'XGBRegressor': []}
mse_scores = {'LinearRegression': [], 'DecisionTreeRegressor': [], 'RandomForestRegressor': [], 'KNeighborsRegressor': [], 'XGBRegressor': []}
rmse_scores = {'LinearRegression': [], 'DecisionTreeRegressor': [], 'RandomForestRegressor': [], 'KNeighborsRegressor': [], 'XGBRegressor': []}

# Create an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['Fold', 'Model', 'MAE', 'MSE', 'RMSE', 'Training_Time', 'Testing_Time'])

fold_num = 1

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train and evaluate the models
    for name, model in [('LinearRegression', LinearRegression()),
                        ('DecisionTreeRegressor', DecisionTreeRegressor()),
                        ('RandomForestRegressor', RandomForestRegressor()),
                        ('KNeighborsRegressor', KNeighborsRegressor()),
                        ('XGBRegressor', XGBRegressor())]:
        # Train the model and calculate training time
        start_train_time = time.time()
        model.fit(X_train, y_train)
        end_train_time = time.time()

        # Make predictions on the test set and calculate testing time
        start_test_time = time.time()
        y_pred = model.predict(X_test)
        end_test_time = time.time()

        # Calculate the evaluation metrics and store in dictionaries
        mae_scores[name].append(mean_absolute_error(y_test, y_pred))
        mse_scores[name].append(mean_squared_error(y_test, y_pred))
        rmse_scores[name].append(mean_squared_error(y_test, y_pred, squared=False))

        # Add the results to the DataFrame
        results_df = results_df.append({'Fold': fold_num, 'Model': name, 'MAE': mean_absolute_error(y_test, y_pred),
                                        'MSE': mean_squared_error(y_test, y_pred),
                                        'RMSE': mean_squared_error(y_test, y_pred, squared=False),
                                        'Training_Time': end_train_time - start_train_time,
                                        'Testing_Time': end_test_time - start_test_time}, ignore_index=True)

    fold_num += 1

# Print the overall evaluation metrics for all models and splits
print(results_df.groupby(['Model']).mean())

# Display the full results table
print(results_df)


In [None]:
import warnings
warnings.filterwarnings("ignore")

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd
import time

#Load your dataset (Sensor Signals)
X = sensors_data.values

#Load your corresponding X, Y and Z Position values
y = position_data.values

#Define the number of blocks for blocked cross-validation
num_blocks = 5
block_size = len(X) // num_blocks

#Create dictionaries to store the evaluation metrics for each model
mae_scores = {'LinearRegression': [], 'DecisionTreeRegressor': [], 'RandomForestRegressor': [], 'KNeighborsRegressor': [], 'XGBRegressor': []}
mse_scores = {'LinearRegression': [], 'DecisionTreeRegressor': [], 'RandomForestRegressor': [], 'KNeighborsRegressor': [], 'XGBRegressor': []}
rmse_scores = {'LinearRegression': [], 'DecisionTreeRegressor': [], 'RandomForestRegressor': [], 'KNeighborsRegressor': [], 'XGBRegressor': []}

# Create an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['Block', 'Model', 'MAE', 'MSE', 'RMSE', 'Training_Time', 'Testing_Time'])

block_num = 1

for i in range(num_blocks):
    if i == num_blocks - 1:
        # Last block may have a different size
        X_test = X[i * block_size:]
        y_test = y[i * block_size:]
        X_train = X[:i * block_size]
        y_train = y[:i * block_size]
    else:
        X_test = X[i * block_size: (i + 1) * block_size]
        y_test = y[i * block_size: (i + 1) * block_size]
        X_train = np.concatenate((X[:i * block_size], X[(i + 1) * block_size:]))
        y_train = np.concatenate((y[:i * block_size], y[(i + 1) * block_size:]))

    # Train and evaluate the models
    for name, model in [('LinearRegression', LinearRegression()),
                        ('DecisionTreeRegressor', DecisionTreeRegressor()),
                        ('RandomForestRegressor', RandomForestRegressor()),
                        ('KNeighborsRegressor', KNeighborsRegressor()),
                        ('XGBRegressor', XGBRegressor())]:
        # Train the model and calculate training time
        start_train_time = time.time()
        model.fit(X_train, y_train)
        end_train_time = time.time()

        # Make predictions on the test set and calculate testing time
        start_test_time = time.time()
        y_pred = model.predict(X_test)
        end_test_time = time.time()

        # Calculate the evaluation metrics and store in dictionaries
        mae_scores[name].append(mean_absolute_error(y_test, y_pred))
        mse_scores[name].append(mean_squared_error(y_test, y_pred))
        rmse_scores[name].append(mean_squared_error(y_test, y_pred, squared=False))

        # Add the results to the DataFrame
        results_df = results_df.append({'Block': block_num, 'Model': name, 'MAE': mean_absolute_error(y_test, y_pred),
                                        'MSE': mean_squared_error(y_test, y_pred),
                                        'RMSE': mean_squared_error(y_test, y_pred, squared=False),
                                        'Training_Time': end_train_time - start_train_time,
                                        'Testing_Time': end_test_time - start_test_time}, ignore_index=True)

block_num += 1

print('Mean Absolute Error:')
for name, scores in mae_scores.items():
print(f'{name}: {np.mean(scores)}')

print('\nMean Squared Error:')
for name, scores in mse_scores.items():
print(f'{name}: {np.mean(scores)}')

print('\nRoot Mean Squared Error:')
for name, scores in rmse_scores.items():
print(f'{name}: {np.mean(scores)}')
