# Data Analytics Project Work

In [None]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Testing on reading one of the datasets and creating a Pandas DataFrame - object
test_df = pd.read_csv("../dataset/Scenario 0/RPI/RSS_BLT_Dataset/Distanza10.csv", sep=';')
test_df.head()

In [None]:
test_df.info()

In [None]:
def read_data(anchors, directory_path):
    """
    Reads the csv-files for one specified measurement case and adds them into a dict of dataframes.
    
    Parameters:
    anchors (list): List of anchors, for example ["Anchor 1", "Anchor 2", "Anchor 3"...]
    directory path (string): The relative directory path of the folder which contains the wanted anchor-csv.files.

    returns: 
    dict: a dict which keys are the anchor names and values are the respective dataframes
    """
    scenario_dataframes = {}

    if "Scenario B" in directory_path:
        for anchor in anchors:
            current_df = pd.read_csv(directory_path + anchor + ".csv", sep=';')
            scenario_dataframes[anchor.replace(".csv","")] = current_df
    else:
        for anchor in anchors[:-1]:
            current_df = pd.read_csv(directory_path + anchor + ".csv", sep=';')
            scenario_dataframes[anchor.replace(".csv","")] = current_df
    
    return scenario_dataframes


In [None]:
def calculate_mean_values_with_bursts(scenario_dataframe, bursts):
    """
    Calculates the mean value over a given amount of bursts.

    Parameters:
    scenario_dataframe (DataFrame): A DataFrame-object containing the dataset of one anchor.
    bursts (int): The amount of bursts.

    Returns:
    int: Returns the mean value over the given amount of bursts.
    """

    mean_values = []

    power_values = scenario_dataframe["Rx Power [dBm]"].to_list()
    list_size = len(power_values)
    burst_size = list_size // bursts

    start = 0
    for i in range(bursts):
        end = start + burst_size if i < bursts - 1 else list_size - 1
        mean_values.append(power_values[end])
        start = end
        
    mean_value = round(np.mean(mean_values), 4)
    
    return mean_value

In [None]:
def calculate_mean(scenario_dataframe):
    """
    """
    
    power_values = scenario_dataframe["Rx Power [dBm]"].to_list()
    mean = round(np.mean(power_values), 4)

    return mean

In [None]:
def calculate_median(scenario_dataframe):
    """
    """

    power_values = scenario_dataframe["Rx Power [dBm]"].to_list()
    median = round(np.median(power_values),4)

    return median


In [None]:
def calculate_variance(scenario_dataframe): 
    """
    """

    power_values = scenario_dataframe["Rx Power [dBm]"].to_list()
    variance = round(np.var(power_values))

    return variance

# https://numpy.org/doc/stable/reference/generated/numpy.var.html

In [None]:
def calculate_estimates(p_initial, n_value, mean_values):
    """
    Calculates the distance estimates for each anchor.

    Parameteres:
    p_initial (int): The initial RSS value for the distance estimation equation.
    n_value (int): The n_value for the distance estimation formula.
    mean_values (dict): A dict which keys are the anchor names and values are their respective mean values over the given amount of bursts

    Returns:
    dict: Returns a dict which keys are the anchor names and values are their respective distance estimations.
    """
    estimated_distances = {}

    for anchor in mean_values:
        estimated_d = 10**((p_initial-mean_values[anchor])/(10*n_value))
        estimated_distances[anchor] = estimated_d

    return estimated_distances

In [None]:
def draw_boxplot(dataframes):
    """
    """

    power_values = []
    for anchor in dataframes:
        power_values.append(dataframes[anchor]["Rx Power [dBm]"].to_list())

    plt.boxplot(power_values)

    plt.xlabel('Anchors')
    plt.ylabel('Power Values')
    plt.title('Boxplot for Power Values for Each Anchor')

    if len(dataframes) == 6:
        plt.xticks([1, 2, 3, 4, 5, 6], ['Anchor 1', 'Anchor 2', 'Anchor 3', 'Anchor 4', 'Anchor 5', 'Anchor 6'])
    else:
        plt.xticks([1, 2, 3, 4, 5, 6, 7], ['Anchor 1', 'Anchor 2', 'Anchor 3', 'Anchor 4', 'Anchor 5', 'Anchor 6', "Anchor 7"])


    plt.show()
    

In [None]:
anchors = ["Anchor 1", "Anchor 2", "Anchor 3", "Anchor 4", "Anchor 5", "Anchor 6", "Anchor 7"]
directory_paths = {"Scenario A, BLT Dataset" : "../dataset/Scenario A/RPI/RSS_BLT_Dataset/", 
                    "Scenario A, WiFi Dataset" :"../dataset/Scenario A/RPI/RSS_WiFi_Dataset/",
                    "Scenario B, BLT Dataset" : "../dataset/Scenario B/RPI/RSS_BLT_Dataset/",
                    "Scenario B, WiFi Dataset" : "../dataset/Scenario B/RPI/RSS_WiFi_Dataset/",
                    "Scenario C, BLT Dataset" : "../dataset/Scenario C/RPI/RSS_BLT_Dataset/",
                    "Scenario C, WiFi Dataset" : "../dataset/Scenario C/RPI/RSS_WiFi_Dataset/"}


In [None]:
for key in directory_paths:
    
    dataframes = read_data(anchors, directory_paths[key])
    print(key)
    print("")

    bursts = 10

    mean_values = {}
    median_values = {}
    variance_values = {}
    for anchor in dataframes:
        mean_values[anchor] = calculate_mean(dataframes[anchor])
        print(anchor, "mean value:", mean_values[anchor])

    print("")

    for anchor in dataframes:
        median_values[anchor] = calculate_median(dataframes[anchor])
        print(anchor, "median value:" , median_values[anchor])

    print("")

    for anchor in dataframes:
        variance_values[anchor] = calculate_variance(dataframes[anchor])
        print(anchor, "variance value:", variance_values[anchor])

    p_initial = -40
    n_value = 2.8
    estimated_distances = calculate_estimates(p_initial, n_value, mean_values)

    print("")

    draw_boxplot(dataframes)

    print("")

#for anchor in anchors:
#    print(anchor, "Estimated distance:", estimated_distances[anchor])