In [None]:
import gzip
import pandas as pd
import os
import re

def min_max_frequencies(monitor_foldername):
    def read_gzipped_csv(file_path):
        with gzip.open(file_path, 'rt') as file:
            return pd.read_csv(file)

    def extract_timestamp_from_filename(filename):
        match = re.search(r'-(\d+)\.csv\.gz$', filename)
        return int(match.group(1)) if match else None

    def concatenate_csvs_in_folder(folder_path):
        dataframes = []
        for filename in os.listdir(folder_path):
            if filename.endswith('.gz'):
                file_path = os.path.join(folder_path, filename)
                df = read_gzipped_csv(file_path)
                timestamp = extract_timestamp_from_filename(filename)
                df['timestamp'] = pd.to_datetime(timestamp, unit='s')
                dataframes.append(df)
        return pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()

    folder_path = './rfbaseline/' + monitor_foldername + "/"
    combined_df = concatenate_csvs_in_folder(folder_path)

    if not combined_df.empty:
        min_frequency = combined_df['frequency'].min()
        max_frequency = combined_df['frequency'].max()
        return min_frequency, max_frequency
    else:
        return None, None

# Example usage
monitor_foldername = 'Guesthouse'  # Replace with your folder name
min_freq, max_freq = min_max_frequencies(monitor_foldername)
if min_freq is not None and max_freq is not None:
    print(f"Minimum Frequency: {min_freq} MHz")
    print(f"Maximum Frequency: {max_freq} MHz")
else:
    print("No data available")


In [None]:
import gzip
import pandas as pd
import os
import re

def min_max_frequencies(monitor_foldername):
    def read_gzipped_csv(file_path):
        with gzip.open(file_path, 'rt') as file:
            return pd.read_csv(file)

    def extract_timestamp_from_filename(filename):
        match = re.search(r'-(\d+)\.csv\.gz$', filename)
        return int(match.group(1)) if match else None

    def concatenate_csvs_in_folder(folder_path):
        dataframes = []
        for filename in os.listdir(folder_path):
            if filename.endswith('.gz'):
                file_path = os.path.join(folder_path, filename)
                df = read_gzipped_csv(file_path)
                timestamp = extract_timestamp_from_filename(filename)
                df['timestamp'] = pd.to_datetime(timestamp, unit='s')
                dataframes.append(df)
        return pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()

    folder_path = './rfbaseline/' + monitor_foldername + "/"
    combined_df = concatenate_csvs_in_folder(folder_path)

    if not combined_df.empty:
        min_frequency = combined_df['frequency'].min()
        max_frequency = combined_df['frequency'].max()
        print(combined_df['frequency'])
        return min_frequency, max_frequency
    else:
        return None, None

# Example usage
monitor_foldername = 'EBC'  # Replace with your folder name
min_freq, max_freq = min_max_frequencies(monitor_foldername)
if min_freq is not None and max_freq is not None:
    print(f"Minimum Frequency: {min_freq} MHz")
    print(f"Maximum Frequency: {max_freq} MHz")
else:
    print("No data available")


In [None]:
import os
import re
import numpy as np

def median_time_diff_between_folders(monitor_foldername):
    def extract_timestamp_from_filename(filename):
        # Extract the Unix timestamp using a regular expression
        match = re.search(r'-(\d+)\.csv\.gz$', filename)
        if match:
            return int(match.group(1))
        else:
            return None

    def calculate_median_time_difference(folder_path):
        timestamps = []

        for filename in os.listdir(folder_path):
            if filename.endswith('.gz'):
                timestamp = extract_timestamp_from_filename(filename)
                if timestamp is not None:
                    timestamps.append(timestamp)

        if timestamps:
            timestamps.sort()
            time_differences = np.diff(timestamps)
            return np.median(time_differences) if time_differences.size > 0 else None
        else:
            return None

    folder_path = './rfbaseline/' + monitor_foldername + "/"
    median_diff = calculate_median_time_difference(folder_path)

    return median_diff

# Example usage of the function
median_diff = median_time_diff_between_folders("Garage")/3600
if median_diff is not None:
    print(f"Median Time Difference Between Consecutive Files: {median_diff} hours")
else:
    print("No files to calculate median time difference.")


In [None]:
import gzip
import pandas as pd
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Define time intervals and seasons
intervals = {
    'morning': ('04:00:00', '12:00:00'),
    'afternoon': ('12:00:00', '20:00:00'),
    'night': ('20:00:00', '04:00:00')
}
seasons = {
    'spring': (3, 6),
    'summer': (6, 9),
    'autumn': (9, 12),
    'winter': (0, 3)
}

# Function to read gzipped CSV files
def read_gzipped_csv(file_path):
    with gzip.open(file_path, 'rt') as file:
        df = pd.read_csv(file)
        return df

# Function to extract timestamp from filename
def extract_timestamp_from_filename(filename):
    match = re.search(r'-(\d+)\.csv\.gz$', filename)
    return int(match.group(1)) if match else None

# Function to concatenate CSVs in a folder
def concatenate_csvs_in_folder(folder_path):
    dataframes = []
    cutoff_date = pd.Timestamp('2023-01-01')

    for filename in os.listdir(folder_path):
        if filename.endswith('.gz'):
            file_path = os.path.join(folder_path, filename)
            timestamp = extract_timestamp_from_filename(filename)
            df = read_gzipped_csv(file_path)
            df['timestamp'] = pd.to_datetime(timestamp, unit='s')
            df = df.dropna()
            df = df[df['timestamp'] >= cutoff_date]
            dataframes.append(df)

    return pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()

# Function to calculate linearly varying threshold
def linear_threshold(freq, start, end, threshold_start, threshold_end):
    return threshold_start + ((threshold_end - threshold_start) * (freq - start) / (end - start))

# Function to process data for a single monitor
def process_monitor_data(monitor_foldername, threshold_start, threshold_end, band_start, band_end):
    folder_path = './rfbaseline/' + monitor_foldername + "/"
    combined_df = concatenate_csvs_in_folder(folder_path)

    results = {'time_of_day': {}, 'season': {}}

    # Time-of-Day Analysis
    for period, (start_time, end_time) in intervals.items():
        if period == 'night':
            mask = ((combined_df['timestamp'].dt.time >= pd.to_datetime(start_time).time()) |
                    (combined_df['timestamp'].dt.time < pd.to_datetime(end_time).time()))
        else:
            mask = ((combined_df['timestamp'].dt.time >= pd.to_datetime(start_time).time()) & 
                    (combined_df['timestamp'].dt.time < pd.to_datetime(end_time).time()))

        dfs_period = combined_df.loc[mask]
        dfs_period = dfs_period[(dfs_period['frequency'] >= band_start) & (dfs_period['frequency'] <= band_end)]

        dfs_period['threshold'] = dfs_period['frequency'].apply(lambda freq: linear_threshold(freq, band_start, band_end, threshold_start, threshold_end))
        occupied_df = dfs_period[dfs_period['power'] > dfs_period['threshold']]
        
        duty_cycle = len(occupied_df) / len(dfs_period) * 100 if len(dfs_period) > 0 else 0
        avg_power_occupied = np.mean(occupied_df['power']) if not occupied_df.empty else 0
        variance_power_occupied = np.var(occupied_df['power']) if not occupied_df.empty else 0

        results['time_of_day'][period] = {
            'Duty_Cycle': duty_cycle,
            'Avg_Power_Occupied': avg_power_occupied,
            'Variance_Power_Occupied': variance_power_occupied
        }

    # Season Analysis
    for season, (start_month, end_month) in seasons.items():
        mask = ((combined_df['timestamp'].dt.month >= start_month) &
                (combined_df['timestamp'].dt.month < end_month))
        df_season = combined_df.loc[mask]
        df_season = df_season[(df_season['frequency'] >= band_start) & (df_season['frequency'] <= band_end)]

        df_season['threshold'] = df_season['frequency'].apply(lambda freq: linear_threshold(freq, band_start, band_end, threshold_start, threshold_end))
        occupied_df = df_season[df_season['power'] > df_season['threshold']]
        
        duty_cycle = len(occupied_df) / len(df_season) * 100 if len(df_season) > 0 else 0
        avg_power_occupied = np.mean(occupied_df['power']) if not occupied_df.empty else 0
        variance_power_occupied = np.var(occupied_df['power']) if not occupied_df.empty else 0

        results['season'][season] = {
            'Duty_Cycle': duty_cycle,
            'Avg_Power_Occupied': avg_power_occupied,
            'Variance_Power_Occupied': variance_power_occupied
        }

    return results

# Main function to process all monitors and plot results
def main():
    sns.set_theme()
    monitors = ['Bookstore', 'EBC', 'Guesthouse', 'Moran', 'WEB', 'Sagepoint', 'Law73', 'Humanities', 'Madsen', 'Garage']  # Add your monitor folder names here
    threshold_start = -105  # Set your threshold start
    threshold_end = -105  # Set your threshold end
    band_start = 3610  # Set your band start
    band_end = 3650  # Set your band end

    all_results = {}

    # Process data for each monitor
    for monitor in monitors:
        all_results[monitor] = process_monitor_data(monitor, threshold_start, threshold_end, band_start, band_end)

    # Plotting for both time-of-day and season analysis
    fig, axs = plt.subplots(6, 1, figsize=(15, 30))  # Create 6 subplots vertically
    fig.suptitle('RF Monitor Metrics at Different Times of Day and Seasons')

    metrics = ['Duty_Cycle', 'Avg_Power_Occupied', 'Variance_Power_Occupied']
    for idx, metric in enumerate(metrics):
        for monitor, results in all_results.items():
            times_of_day = list(results['time_of_day'].keys())
            values = [results['time_of_day'][time][metric] for time in times_of_day]
            axs[idx].plot(times_of_day, values, label=monitor)

        axs[idx].set_title(metric + ' (Time of Day)')
        axs[idx].set_xlabel('Time of Day')
        axs[idx].set_ylabel(metric)
        axs[idx].legend()

    for idx, metric in enumerate(metrics):
        for monitor, results in all_results.items():
            season_names = list(results['season'].keys())
            values = [results['season'][season][metric] for season in season_names]
            axs[idx + 3].plot(season_names, values, label=monitor)

        axs[idx + 3].set_title(metric + ' (Season)')
        axs[idx + 3].set_xlabel('Season')
        axs[idx + 3].set_ylabel(metric)
        axs[idx + 3].legend()

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])  # Adjust layout to make room for the main title
    plt.show()
    
    return all_results

if __name__ == "__main__":
    all_results = main()


In [None]:
np.save("monitoring_metrics_day_season_36103650.npy", all_results)

In [None]:
all_results_dummy = np.load("monitoring_metrics_day_season.npy", allow_pickle=True)

In [None]:
all_results

In [None]:
all_results_dummy

In [None]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import seaborn as sns

custom = {"axes.edgecolor": "black"}
sns.set_style("whitegrid", rc=custom)
matplotlib.rcParams.update({'font.size': 18})

fig, axs = plt.subplots(2, 3, figsize=(20, 15))  # Create 6 subplots
fig.suptitle('RF Monitoring Metrics at Different Times of Day and Seasons, 3610-3650 MHz', fontsize=30)

metrics = ['Avg_Power_Occupied', 'Duty_Cycle', 'Variance_Power_Occupied']
metric_units = ['dBX', 'Percentage', 'dBX$^2$']
metric_labels = ['Average Occupancy Power', 'Duty Cycle', 'Signal Variance']
colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple', 'tab:brown', 'tab:pink', 'tab:gray', 'tab:olive', 'tab:cyan']  # Add more colors if needed
avg_color = 'black'  # Color for the average line

legend_labels = []
legend_lines = []

# Plot individual lines and calculate averages for the top row (Time of Day)
for idx, metric in enumerate(metrics):
    all_values = []  # To store values for calculating average
    for monitor_idx, (monitor, results) in enumerate(all_results.items()):
        times_of_day = list(results['time_of_day'].keys())
        values = [results['time_of_day'][time][metric] for time in times_of_day]
        all_values.append(values)
        line, = axs[0, idx].plot(times_of_day, values, color=colors[monitor_idx], linewidth=2, linestyle='solid')
        if idx == 0:
            legend_labels.append(monitor)
            legend_lines.append(line)

    # Calculate and plot average line
    average_values = np.mean(all_values, axis=0)
    axs[0, idx].plot(times_of_day, average_values, color=avg_color, linewidth=3.5, linestyle='--', label='Average')

    axs[0, idx].set_title(metric_labels[idx] + ' (Time of Day)', fontsize=20)
    axs[0, idx].set_xlabel('Time of Day', fontsize=18)
    axs[0, idx].set_ylabel(metric_units[idx], fontsize=18)
    axs[0, idx].grid(True)
    axs[0, idx].tick_params(axis='both', labelsize=16)
    axs[0, idx].text(-0.1, 1.1, f'({chr(97 + idx)})', transform=axs[0, idx].transAxes, fontsize=20, fontweight='bold', va='top', ha='right')

# Add legend for the top row
fig.legend(legend_lines + [plt.Line2D([0], [0], color=avg_color, linestyle='--', linewidth=3)], legend_labels + ['Average'], loc='lower center', ncol=len(legend_labels) + 1, fontsize=16, bbox_to_anchor=(0.5, 0.905))

# Clear legend lines and labels for the bottom row
legend_lines.clear()
legend_labels.clear()

# Plot individual lines and calculate averages for the bottom row (Season)
for idx, metric in enumerate(metrics):
    all_values = []  # To store values for calculating average
    for monitor_idx, (monitor, results) in enumerate(all_results.items()):
        season_names = list(results['season'].keys())
        values = [results['season'][season][metric] for season in season_names]
        all_values.append(values)
        line, = axs[1, idx].plot(season_names, values, color=colors[monitor_idx], linewidth=2, linestyle='dashed')
        if idx == 0:
            legend_labels.append(monitor)
            legend_lines.append(line)

    # Calculate and plot average line
    average_values = np.mean(all_values, axis=0)
    axs[1, idx].plot(season_names, average_values, color=avg_color, linewidth=3.5, linestyle='-', label='Average')

    axs[1, idx].set_title(metric_labels[idx] + ' (Season)', fontsize=20)
    axs[1, idx].set_xlabel('Season', fontsize=18)
    axs[1, idx].set_ylabel(metric_units[idx], fontsize=18)
    axs[1, idx].grid(True)

    # Adjust tick parameters
    axs[1, idx].tick_params(axis='both', labelsize=16)
    axs[1, idx].text(-0.1, 1.1, f'({chr(100 + idx)})', transform=axs[1, idx].transAxes, fontsize=20, fontweight='bold', va='bottom', ha='right')

    

# Add legend for the bottom row
fig.legend(legend_lines+ [plt.Line2D([0], [0], color=avg_color, linestyle='-', linewidth=3)], legend_labels+ ['Average'], loc='lower center', ncol=len(legend_labels)+1, fontsize=16, bbox_to_anchor=(0.5, 0.04))

# Adjust layout to make room for the main title, legends and improve spacing
plt.tight_layout(rect=[0, 0.08, 1, 0.95])

plt.show()

In [None]:
import numpy as np
var_3470_3510 = np.array([20.95,
18.4,
25.86,
9.8,
22.53,
13.13,
15.03,
16.24,
21.51,
23
])


var_3610_3650 = np.array([12.64,
16.45,
5.27,
8.38,
28.14,
10.46,
12.16,
18.02,
4.31,
33.2
])


var_2160_2170 = np.array([49.69,
4.03,
3.16,
24.47,
7.04,
2.72,
16.28,
7.33,
23.37,
3.74
])


rssi_3470_3510 = np.array([-85.53,
-95.92,
-96.29,
-101.19,
-93.44,
-99.88,
-88.62,
-99.78,
-96.45,
-88.8
])


rssi_3610_3650 = np.array([-84.93,
-98.39,
-101.84,
-101.58,
-92.01,
-101.67,
-86.77,
-98.45,
-102.38,
-89.61
])

rssi_2160_2170 = np.array([-85.43,
-76.88,
-76.22,
-86.68,
-78.85,
-80.78,
-79.53,
-74.71,
-80.88,
-72.33
])

dc_3470_3510 = np.array([45.25,
98.68,
96.1,
37.4,
45.42,
85.82,
44.84,
74.81,
67.82,
100
])


dc_3610_3650 = np.array([45.25,
92.52,
77.09,
23.34,
45.41,
55.23,
44.84,
85.13,
38.63,
100
])

dc_2160_2170 = np.array([99.89,
100,
100,
97.74,
45.42,
100,
100,
100,
100,
100
])

from scipy.stats import pearsonr

print("Var-RSSI-3470-3510")
print(pearsonr(var_3470_3510,rssi_3470_3510))
print("Var-DC-3470-3510")
print(pearsonr(var_3470_3510,dc_3470_3510))
print("DC-RSSI-3470-3510")
print(pearsonr(dc_3470_3510,rssi_3470_3510))

print("Var-RSSI-3610-3650")
print(pearsonr(var_3610_3650,rssi_3610_3650))
print("Var-DC-3610-3650")
print(pearsonr(var_3610_3650,dc_3610_3650))
print("DC-RSSI-3610-3650")
print(pearsonr(dc_3610_3650,rssi_3610_3650))

print("Var-RSSI-2160-2170")
print(pearsonr(var_2160_2170,rssi_2160_2170))
print("Var-DC-3610-3650")
print(pearsonr(var_2160_2170,dc_2160_2170))
print("DC-RSSI-3610-3650")
print(pearsonr(dc_2160_2170,rssi_2160_2170))

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge, Lasso  # Importing Ridge
from sklearn.pipeline import make_pipeline

# Non-linear regression model using polynomial features with Ridge regularization
degree = 3  # Degree of polynomial features
alpha = 0.05  # Regularization strength

model_3470_3510 = make_pipeline(PolynomialFeatures(degree), Lasso(alpha=alpha))
model_3610_3650 = make_pipeline(PolynomialFeatures(degree), Lasso(alpha=alpha))
model_2160_2170 = make_pipeline(PolynomialFeatures(degree), Lasso(alpha=alpha))

# Reshaping the data for the model
X_3470_3510 = rssi_3470_3510.reshape(-1, 1)
X_3610_3650 = rssi_3610_3650.reshape(-1, 1)
X_2160_2170 = rssi_2160_2170.reshape(-1, 1)

# Fitting the models
model_3470_3510.fit(X_3470_3510, var_3470_3510)
model_3610_3650.fit(X_3610_3650, var_3610_3650)
model_2160_2170.fit(X_2160_2170, var_2160_2170)

# Generating predictions for plotting
X_plot = np.linspace(-95, -70, 100).reshape(-1, 1)
y_pred_3470_3510 = model_3470_3510.predict(X_plot)
y_pred_3610_3650 = model_3610_3650.predict(X_plot)
y_pred_2160_2170 = model_2160_2170.predict(X_plot)

# Plotting
plt.figure(figsize=(18, 6))

# Adjusting font sizes
plt.rcParams.update({'font.size': 18})  # Adjust this value as needed

plt.subplot(1, 3, 1)
plt.scatter(rssi_3470_3510, var_3470_3510, color='blue', label='Data 3470-3510 MHz', s=45)
plt.plot(X_plot, y_pred_3470_3510, color='deepskyblue', label='Model Prediction', linewidth=3.5)
plt.xlabel('RSSI [dBX]', fontsize=20)
plt.ylabel('Variance [dBX$^2$]', fontsize=20)
plt.title('Regression for 3470-3510 MHz', fontsize=22)
plt.legend(fontsize=18, loc='lower right')
plt.tick_params(axis='both', which='major', labelsize=18)  # Adjust the size of ticks

plt.subplot(1, 3, 2)
plt.scatter(rssi_3610_3650, var_3610_3650, color='green', label='Data 3610-3650 MHz', s=45)
plt.plot(X_plot, y_pred_3610_3650, color='limegreen', label='Model Prediction', linewidth=3.5)
plt.xlabel('RSSI [dBX]', fontsize=20)
plt.ylabel('Variance [dBX$^2$]', fontsize=20)
plt.title('Regression for 3610-3650 MHz', fontsize=22)
plt.legend(fontsize=18, loc='lower right')
plt.tick_params(axis='both', which='major', labelsize=18)  # Adjust the size of ticks

plt.subplot(1, 3, 3)
plt.scatter(rssi_2160_2170, var_2160_2170, color='red', label='Data 3610-3650 MHz', s=45)
plt.plot(X_plot, y_pred_2160_2170, color='darkred', label='Model Prediction', linewidth=3.5)
plt.xlabel('RSSI [dBX]', fontsize=20)
plt.ylabel('Variance [dBX$^2$]', fontsize=20)
plt.title('Regression for 2160-2170 MHz', fontsize=22)
plt.legend(fontsize=18, loc='upper right')
plt.tick_params(axis='both', which='major', labelsize=18)  # Adjust the size of ticks

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Lasso
from sklearn.pipeline import make_pipeline
import seaborn as sns
sns.set_theme()
custom = {"axes.edgecolor": "black"}
sns.set_style("whitegrid", rc=custom)
# Non-linear regression model using polynomial features with Lasso regularization
degree = 3  # Degree of polynomial features
alpha = 0.05  # Regularization strength

# Combine all datasets
X_combined = np.concatenate([rssi_3470_3510, rssi_3610_3650, rssi_2160_2170]).reshape(-1, 1)
var_combined = np.concatenate([var_3470_3510, var_3610_3650, var_2160_2170])

# Create a single model
combined_model = make_pipeline(PolynomialFeatures(degree), Lasso(alpha=alpha))

# Fit the model to the combined dataset
combined_model.fit(X_combined, var_combined)

# Generating predictions for plotting
X_plot = np.linspace(X_combined.min(), X_combined.max(), 100).reshape(-1, 1)
y_pred_combined = combined_model.predict(X_plot)

# Plotting
plt.figure(figsize=(9, 6))

# Adjusting font sizes
plt.rcParams.update({'font.size': 14})

# Plotting data points
plt.scatter(rssi_3470_3510, var_3470_3510, color='blue', label='Data 3470-3510 MHz', s=70, marker='x')
plt.scatter(rssi_3610_3650, var_3610_3650, color='green', label='Data 3610-3650 MHz', s=70, marker = 's')
plt.scatter(rssi_2160_2170, var_2160_2170, color='red', label='Data 2160.5-2169.5 MHz', s=70)

# Plotting the model prediction
plt.plot(X_plot, y_pred_combined, color='black', label='Combined Model Prediction', linewidth=3)

plt.xlabel('RSSI [dBX]', fontsize=16)
plt.ylabel('Variance [dBX$^2$]', fontsize=16)
plt.title('Combined Regression Analysis', fontsize=18)
plt.legend(fontsize=14, loc='upper left')
plt.tick_params(axis='both', which='major', labelsize=14)
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import scipy.io as sio
import os
import warnings

def calculate_confidence(map_, data_points, d_max, alpha):
    """
    Calculate the confidence map based on the provided data points and the formula.
    """
    confidence_map = np.zeros_like(map_)
    for i in range(map_.shape[1]):
        for j in range(map_.shape[0]):
            d_p = np.min(np.sqrt((data_points[:, 0] - i)**2 + (data_points[:, 1] - j)**2))
            confidence = (1 - min(d_p / d_max, 1)) * np.exp(-alpha * d_p)
            confidence_map[j, i] = confidence
    return confidence_map

# Load the map
map_folderdir = "./"
directory = os.listdir(map_folderdir)
flag = 0
for fname in directory:
    if "SLCmap" in fname:
        map_file = os.path.join(map_folderdir, fname)
        flag = 1

if flag == 0:
    errorMessage = 'Error: The file does not exist in the folder:\n ' + map_folderdir
    warnings.warn(errorMessage)
    raise FileNotFoundError(errorMessage)

x = sio.loadmat(map_file)
map_struct = x['SLC']

# Define a new struct named SLC
SLC = map_struct[0][0]
column_map = dict(zip([name for name in SLC.dtype.names], [i for i in range(len(SLC.dtype.names))]))

# Process the map
map_ = SLC[column_map["dem"]] + 0.3048 * SLC[column_map["hybrid_bldg"]]
map_ = map_[::10, ::10]

# Data points
data_points = np.array([
    [966, 2992],
    [2569, 3767],
    [2873, 3447],
    [2621, 4286],
    [1312, 3830],
    [3828, 2667],
    [242, 2442],
    [1711, 3145],
    [2852, 1584],
    [1903, 2393]
])
data_points = data_points.astype(float)/10.0
data_points = np.floor(data_points).astype("int")

# Define parameters
d_max = 1000  # distance threshold
alpha = 0.01  # adjustable parameter

# Calculate confidence
confidence_map = calculate_confidence(map_, data_points, d_max, alpha)

# You can add additional code here to visualize or save the confidence map


In [None]:
grid_x, grid_y = np.meshgrid(
    np.linspace(0, map_.shape[1], map_.shape[1], endpoint=False),
    np.linspace(0, map_.shape[0], map_.shape[0], endpoint=False)
)

# Plotting
plt.figure()
plt.contourf(grid_x, grid_y, confidence_map*100, 100, cmap='viridis')
plt.colorbar(label='Confidence [Percentage]')
plt.xlabel('UTM_E [m]')
plt.ylabel('UTM_N [m]')
plt.title('2D Predictions of Confidence Level')
plt.show()

In [None]:
np.save("confidence_level.npy", confidence_map)

In [None]:
import os
import scipy.io as sio
import matplotlib.pyplot as plt
map_folderdir = "./"
directory = os.listdir(map_folderdir)
flag = 0
for fname in directory:
    if "SLCmap" in fname:
        map_file = os.path.join(map_folderdir, fname)
        flag = 1

if flag == 0:
    errorMessage = 'Error: The file does not exist in the folder:\n ' + map_folderdir
    warnings.warn(errorMessage)

print('Now reading ' + map_file + "\n")
x = sio.loadmat(map_file)
map_struct = x['SLC']

# Define a new struct named SLC
SLC = map_struct[0][0]
column_map = dict(zip([name for name in SLC.dtype.names], [i for i in range(len(SLC.dtype.names))]))

map_ = SLC[column_map["dem"]] + 0.3048 * SLC[column_map["hybrid_bldg"]]
map_ = map_[::10, ::10]

data_points = np.array([
    [966, 2992],
    [2569, 3767],
    [2873, 3447],
    [2621, 4286],
    [1312, 3830],
    [3828, 2667],
    [242, 2442],
    [1711, 3145],
    [2852, 1584],
    [1903, 2393]
])
data_points = data_points.astype(float)/10.0
data_points = np.floor(data_points).astype("int")

def lin_to_dB(pow_lin):
    return 10*np.log10(np.array(pow_lin))

band = "2160_2170"
rssi = lin_to_dB(np.load("signal_estimates_"+band+"_low_res_cov2.npy"))
y_pred_2160_2170 = combined_model.predict(rssi.reshape(-1, 1)).reshape(map_.shape)

grid_x, grid_y = np.meshgrid(
    np.linspace(0, map_.shape[1], map_.shape[1], endpoint=False),
    np.linspace(0, map_.shape[0], map_.shape[0], endpoint=False)
)

# Plotting
plt.figure()
plt.contourf(grid_x, grid_y, y_pred_2160_2170, 100, cmap='viridis')
plt.colorbar(label='Variance [dBX$^2$]')
plt.scatter(data_points[:, 0], data_points[:, 1], c=var_2160_2170, s=50, cmap='plasma', label='Data Collection Points')
plt.xlabel('UTM_E [m]')
plt.ylabel('UTM_N [m]')
plt.title('2D Predictions of Variance')
plt.legend()
plt.show()


In [None]:
import os
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import griddata
from scipy.spatial import cKDTree

# IDW interpolation function with distance threshold
def idw_interpolation(x, y, z, xi, yi, max_distance):
    # Create a KDTree for efficient neighbor search
    tree = cKDTree(np.column_stack((x, y)))
    
    # Initialize output array
    zi = np.zeros_like(xi)

    # Iterate over all points in the output grid
    for i in range(len(xi)):
        # Radius-based query to find neighbors within max_distance
        indices = tree.query_ball_point((xi[i], yi[i]), r=max_distance)
        
        # Get the distances and corresponding z values
        distances = np.sqrt((x[indices] - xi[i])**2 + (y[indices] - yi[i])**2)*5
        valid_z = z[indices]

        # Compute weights and interpolate
        weights = 1.0 / (distances**2 + 1e-12)
        weights_sum = np.sum(weights)
        
        if weights_sum > 0 and len(valid_z) > 0:
            zi[i] = np.sum(weights * valid_z) / weights_sum
        else:
            zi[i] = np.nan  # or some default value if needed

    return zi
# Mask for values below 0 in y_pred_3470_3510
mask = y_pred_2160_2170 < 3

# Grid coordinates
grid_x, grid_y = np.meshgrid(
    np.linspace(0, map_.shape[1], map_.shape[1], endpoint=False),
    np.linspace(0, map_.shape[0], map_.shape[0], endpoint=False)
)

# Flatten grid coordinates
flat_grid_x = grid_x.ravel()
flat_grid_y = grid_y.ravel()

# Positive values and their coordinates
pos_values = y_pred_2160_2170[~mask]
pos_x = flat_grid_x[~mask.ravel()]
pos_y = flat_grid_y[~mask.ravel()]

# Interpolate using IDW
interpolated_values = idw_interpolation(pos_x, pos_y, pos_values, flat_grid_x[mask.ravel()], flat_grid_y[mask.ravel()], max_distance=200.0)

# Replace negative values in y_pred_3470_3510 with interpolated values
y_pred_2160_2170[mask] = interpolated_values

# Plotting (remains the same)
plt.figure()
plt.contourf(grid_x, grid_y, y_pred_2160_2170, 100, cmap='viridis')
plt.colorbar(label='Variance [dBX$^2$]')
plt.scatter(data_points[:, 0], data_points[:, 1], c=var_2160_2170, s=50, cmap='plasma', label='Data Collection Points')
plt.xlabel('UTM_E [m]')
plt.ylabel('UTM_N [m]')
plt.title('2D Predictions of Variance')
plt.legend()
plt.show()



In [None]:
np.save("var_pred_2160_2170_3.npy",y_pred_2160_2170)

In [None]:
import gzip
import pandas as pd
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Define the time intervals
intervals = {
    'morning': ('04:00:00', '12:00:00'),
    'afternoon': ('12:00:00', '20:00:00'),
    'night': ('20:00:00', '04:00:00')
}

# Function to read gzipped CSV files
def read_gzipped_csv(file_path):
    with gzip.open(file_path, 'rt') as file:
        df = pd.read_csv(file)
        return df

# Function to extract timestamp from filename
def extract_timestamp_from_filename(filename):
    match = re.search(r'-(\d+)\.csv\.gz$', filename)
    return int(match.group(1)) if match else None

# Function to concatenate CSVs in a folder
def concatenate_csvs_in_folder(folder_path):
    dataframes = []
    cutoff_date = pd.Timestamp('2023-01-01')

    for filename in os.listdir(folder_path):
        if filename.endswith('.gz'):
            file_path = os.path.join(folder_path, filename)
            timestamp = extract_timestamp_from_filename(filename)
            df = read_gzipped_csv(file_path)
            df['timestamp'] = pd.to_datetime(timestamp, unit='s')
            df = df.dropna()
            df = df[df['timestamp'] >= cutoff_date]
            dataframes.append(df)

    return pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()

# Function to calculate linearly varying threshold
def linear_threshold(freq, start, end, threshold_start, threshold_end):
    return threshold_start + ((threshold_end - threshold_start) * (freq - start) / (end - start))

# Function to process data for a single monitor
def process_monitor_data(monitor_foldername, threshold_start, threshold_end, band_start, band_end):
    folder_path = './rfbaseline/' + monitor_foldername + "/"
    combined_df = concatenate_csvs_in_folder(folder_path)

    results = {}
    for period, (start_time, end_time) in intervals.items():
        if period == 'night':
            mask = ((combined_df['timestamp'].dt.time >= pd.to_datetime(start_time).time()) |
                    (combined_df['timestamp'].dt.time < pd.to_datetime(end_time).time()))
        else:
            mask = ((combined_df['timestamp'].dt.time >= pd.to_datetime(start_time).time()) & 
                    (combined_df['timestamp'].dt.time < pd.to_datetime(end_time).time()))

        dfs_period = combined_df.loc[mask]
        dfs_period = dfs_period[(dfs_period['frequency'] >= band_start) & (dfs_period['frequency'] <= band_end)]

        dfs_period['threshold'] = dfs_period['frequency'].apply(lambda freq: linear_threshold(freq, band_start, band_end, threshold_start, threshold_end))
        occupied_df = dfs_period[dfs_period['power'] > dfs_period['threshold']]
        
        duty_cycle = len(occupied_df) / len(dfs_period) * 100 if len(dfs_period) > 0 else 0
        avg_power_occupied = np.mean(occupied_df['power']) if not occupied_df.empty else 0
        variance_power_occupied = np.var(occupied_df['power']) if not occupied_df.empty else 0

        results[period] = {
            'Duty_Cycle': duty_cycle,
            'Avg_Power_Occupied': avg_power_occupied,
            'Variance_Power_Occupied': variance_power_occupied
        }

    return results


# Main function to process all monitors and plot results
def main():
    sns.set_theme()
    monitors = ['Bookstore', 'EBC', 'Guesthouse', 'Moran', 'WEB', 'Sagepoint', 'Law73', 'Humanities', 'Madsen', 'Garage']  # Add your monitor folder names here
    threshold_start = -105  # Set your threshold start
    threshold_end = -105  # Set your threshold end
    band_start = 3470  # Set your band start
    band_end = 3510  # Set your band end

    all_results = {}

    # Process data for each monitor
    for monitor in monitors:
        all_results[monitor] = process_monitor_data(monitor, threshold_start, threshold_end, band_start, band_end)

    # Set up the subplot figure
    fig, axs = plt.subplots(3, 1, figsize=(12, 18))  # Create 3 subplots vertically
    fig.suptitle('RF Monitor Metrics at Different Times of Day')

    metrics = ['Duty_Cycle', 'Avg_Power_Occupied', 'Variance_Power_Occupied']
    for idx, metric in enumerate(metrics):
        for monitor, results in all_results.items():
            times_of_day = list(results.keys())
            values = [results[time][metric] for time in times_of_day]
            axs[idx].plot(times_of_day, values, label=monitor)

        axs[idx].set_title(metric)
        axs[idx].set_xlabel('Time of Day')
        axs[idx].set_ylabel(metric)
        axs[idx].legend()

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])  # Adjust layout to make room for the main title
    plt.show()
    
    return all_results

if __name__ == "__main__":
    all_results = main()


In [None]:
for metric in metrics:
    plt.figure(figsize=(10, 6))
    for monitor, results in all_results.items():
        times_of_day = list(results.keys())
        values = [results[time][metric] for time in times_of_day]
        plt.plot(times_of_day, values, label=monitor)

    plt.title(f'{metric} for Each Monitor at Different Times of Day')
    plt.xlabel('Time of Day')
    plt.ylabel(metric)
    plt.legend()
    plt.show()

In [None]:
import gzip
import pandas as pd
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib

def occupancy(monitor_foldername, threshold_start, threshold_end, band_start, band_end):
    sns.set_theme()

    def read_gzipped_csv(file_path):
        with gzip.open(file_path, 'rt') as file:
            df = pd.read_csv(file)
            return df


    def extract_timestamp_from_filename(filename):
        # Extract the Unix timestamp using a regular expression
        match = re.search(r'-(\d+)\.csv\.gz$', filename)
        if match:
            return int(match.group(1))
        else:
            return None


    def concatenate_csvs_in_folder(folder_path):
        dataframes = []
        cutoff_date = pd.Timestamp('2023-01-01')  # Set the cutoff date to Jan 1, 2023

        for filename in os.listdir(folder_path):
            if filename.endswith('.gz'):
                file_path = os.path.join(folder_path, filename)
                timestamp = extract_timestamp_from_filename(filename)
                df = read_gzipped_csv(file_path)
                df['timestamp'] = pd.to_datetime(timestamp, unit='s')  # Convert timestamp to datetime

                # Drop rows with NaN values
                df = df.dropna()
                # Filter out rows older than June 2022
                df = df[df['timestamp'] >= cutoff_date]

                dataframes.append(df)

        # Concatenate all dataframes into a single dataframe
        if dataframes:
            return pd.concat(dataframes, ignore_index=True)
        else:
            return pd.DataFrame()


    # Example usage
    folder_path = './rfbaseline/'+monitor_foldername+"/"
    combined_df = concatenate_csvs_in_folder(folder_path)
    #print(np.unique(combined_df["timestamp"]))
    combined_df = combined_df.drop(columns=['center_freq'])


    # Assuming combined_df is your DataFrame


    #df = combined_df.groupby('frequency')['power'].apply(avg_pow).reset_index()

    # Define the start and end of the CBRS band (in MHz)
    #band_start = 3470
    #band_end = 3510

    # Example usage
    folder_path = './rfbaseline/'+monitor_foldername+"/"
    combined_df = concatenate_csvs_in_folder(folder_path)
    combined_df = combined_df.drop(columns=['center_freq'])

    # Define the start and end of the CBRS band (in MHz)
    chunk_size = 1 #np.round((band_end-band_start)/100)  # in MHz

    # Initialize a list to store the aggregate results
    aggregate_results = []

    def linear_threshold(freq, start, end, threshold_start, threshold_end):
        """Calculate linearly varying threshold."""
        return threshold_start + ((threshold_end - threshold_start) * (freq - start) / (end - start))

    # Iterate over the frequency band in chunks
    for start in np.arange(band_start, band_end, chunk_size):
        end = start + chunk_size
        chunk_df = combined_df[(combined_df['frequency'] >= start) & (combined_df['frequency'] <= end)]

        # Calculate mean and standard deviation in the linear domain
        mean_power_db = np.mean(chunk_df['power'])
        std_power_db = np.std(chunk_df['power'])

        # Calculate the dynamic threshold for the chunk
        chunk_threshold = linear_threshold((start+end)/2, band_start, band_end, threshold_start, threshold_end)
        print(chunk_threshold)
        # Calculate the duty cycle and average power when occupied
        occupied_df = chunk_df[chunk_df['power'] > chunk_threshold]
        duty_cycle = len(occupied_df) / len(chunk_df) * 100
        avg_power_occupied = np.mean(occupied_df['power']) if not occupied_df.empty else np.nan
        variance_power_occupied = np.var(occupied_df['power']) if not occupied_df.empty else np.nan

        # Store the results for the current chunk
        aggregate_results.append({
            'Chunk_Start': start,
            'Chunk_End': end,
            'Mean': mean_power_db,
            'Std': std_power_db,
            'Threshold': chunk_threshold,
            'Duty_Cycle': duty_cycle,
            'Avg_Power_Occupied': avg_power_occupied,
            'Variance_Power_Occupied': variance_power_occupied
        })

    # Rest of the code for aggregating results and plotting...

    
    # Convert the list of dicts to a DataFrame
    aggregate_results_df = pd.DataFrame(aggregate_results)

    # Aggregate the results
    # You can choose how to aggregate, here's a simple mean aggregation
    final_mean_threshold = aggregate_results_df['Threshold'].mean()
    final_duty_cycle = aggregate_results_df['Duty_Cycle'].mean()
    final_avg_power_occupied = aggregate_results_df['Avg_Power_Occupied'].mean()
    final_variance_power_occupied = aggregate_results_df['Variance_Power_Occupied'].mean()

    print(aggregate_results_df)
    print(f"Final Mean Threshold: {final_mean_threshold}")
    print(f"Final Duty Cycle: {final_duty_cycle}")
    print(f"Final Average Power Occupied: {final_avg_power_occupied}")
    print(f"Final Variance of Power Occupied: {final_variance_power_occupied}")


    # Filter rows where frequency is between band start and ends
    
    ## 3350-3400
    #filtered_df = combined_df[(combined_df['frequency'] >= band_start) & (combined_df['frequency'] <= band_end)& ((combined_df['frequency'] <= 3358.5) | (combined_df['frequency'] >= 3361)) & ((combined_df['frequency'] <= 3383) | (combined_df['frequency'] >= 3385.5))]
    
    ## 3470-3520
    filtered_df = combined_df[(combined_df['frequency'] >= band_start) & (combined_df['frequency'] <= band_end)]


    # Calculate the linearly varying threshold for each frequency
    filtered_df['threshold'] = filtered_df['frequency'].apply(lambda freq: linear_threshold(freq, band_start, band_end, threshold_start, threshold_end))

    plt.figure(figsize=(20, 15))
    plt.scatter(filtered_df["frequency"], filtered_df["power"], marker="*", s=0.3)
    frequencies = np.linspace(band_start, band_end, 500)  # Generate frequency points
    thresholds = [linear_threshold(freq, band_start, band_end, threshold_start, threshold_end) for freq in frequencies]
    plt.plot(frequencies, thresholds, 'r--', label='Threshold')  # Plot threshold line

    # Setting labels, title, and legend
    plt.xlabel("Frequency (MHz)")
    plt.ylabel("Power (dB)")
    plt.title("Monitor @"+monitor_foldername)
    plt.legend()

    plt.xlabel("Frequency (MHz)")
    plt.ylabel("Power (dB)")
    plt.title("Monitor @"+monitor_foldername)
    plt.show()

    # Count the number of rows above their respective power threshold
    occupied_count = filtered_df[filtered_df['power'] > filtered_df['threshold']].shape[0]

    # Calculate the total number of rows in the filtered DataFrame
    total_count = filtered_df.shape[0]

    # Calculate the duty cycle
    duty_cycle = (occupied_count / total_count) * 100

    print(duty_cycle)
    matplotlib.rcParams['font.family'] = 'Times New Roman'
    fig, ax = plt.subplots(figsize=(8, 6))
    plt.hist(filtered_df["power"], bins=30, color='green')
    plt.ylabel("Count", fontsize=20)
    plt.xlabel("Power [dBX]", fontsize=20)
    plt.title(str(band_start) + "-" + str(band_end) + " MHz Power Histogram @ "+monitor_foldername, fontsize=24)
    #ax.set_xticklabels(fontsize=14, rotation=0)
    ax.tick_params(axis='x', labelsize=16)
    ax.tick_params(axis='y', labelsize=14)
    #plt.axvline(x=threshold, color='red', linestyle='dashed')
    #plt.text(threshold + 0.5, plt.ylim()[1] * 0.75, 'Threshold', color='red', fontsize=18)
    plt.show()

### Functional Form

In [None]:
import gzip
import pandas as pd
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib


def occupancy(monitor_foldername, threshold, band_start, band_end):
    sns.set_theme()

    def read_gzipped_csv(file_path):
        with gzip.open(file_path, 'rt') as file:
            df = pd.read_csv(file)
            return df


    def extract_timestamp_from_filename(filename):
        # Extract the Unix timestamp using a regular expression
        match = re.search(r'-(\d+)\.csv\.gz$', filename)
        if match:
            return int(match.group(1))
        else:
            return None


    def concatenate_csvs_in_folder(folder_path):
        dataframes = []
        cutoff_date = pd.Timestamp('2023-01-01')  # Set the cutoff date to Jan 1, 2023

        for filename in os.listdir(folder_path):
            if filename.endswith('.gz'):
                file_path = os.path.join(folder_path, filename)
                timestamp = extract_timestamp_from_filename(filename)
                df = read_gzipped_csv(file_path)
                df['timestamp'] = pd.to_datetime(timestamp, unit='s')  # Convert timestamp to datetime

                # Drop rows with NaN values
                df = df.dropna()
                # Filter out rows older than June 2022
                df = df[df['timestamp'] >= cutoff_date]

                dataframes.append(df)

        # Concatenate all dataframes into a single dataframe
        if dataframes:
            return pd.concat(dataframes, ignore_index=True)
        else:
            return pd.DataFrame()


    # Example usage
    folder_path = './rfbaseline/'+monitor_foldername+"/"
    combined_df = concatenate_csvs_in_folder(folder_path)
    #print(np.unique(combined_df["timestamp"]))
    combined_df = combined_df.drop(columns=['center_freq'])


    # Assuming combined_df is your DataFrame


    #df = combined_df.groupby('frequency')['power'].apply(avg_pow).reset_index()

    # Define the start and end of the CBRS band (in MHz)
    #band_start = 3470
    #band_end = 3510
    chunk_size = band_end-band_start  # in MHz

    # Initialize a list to store the aggregate results
    aggregate_results = []

    # Iterate over the frequency band in chunks of 25 MHz
    for start in np.arange(band_start, band_end, chunk_size):
        end = start + chunk_size
        # Filter the DataFrame for the current chunk
        ## 3350-3400
        #chunk_df = combined_df[(combined_df['frequency'] >= start) & (combined_df['frequency'] < end)& ((combined_df['frequency'] <= 3358.5) | (combined_df['frequency'] >= 3361)) & ((combined_df['frequency'] <= 3383) | (combined_df['frequency'] >= 3385))]
        
        ## 3470-3520
        chunk_df = combined_df[(combined_df['frequency'] >= start) & (combined_df['frequency'] <= end)]

        # Calculate mean and standard deviation in the linear domain
        mean_power_db = np.mean(chunk_df['power'])
        std_power_db = np.std(chunk_df['power'])

        # Set the threshold to mean + 1 standard deviation
        #threshold = -145

        # Calculate the duty cycle and average power when occupied
        occupied_df = chunk_df[chunk_df['power'] > threshold]
        duty_cycle = len(occupied_df) / len(chunk_df) * 100
        avg_power_occupied = np.mean(occupied_df['power']) if not occupied_df.empty else np.nan
        variance_power_occupied = np.var(occupied_df['power']) if not occupied_df.empty else np.nan

        # Store the results for the current chunk
        aggregate_results.append({
            'Chunk_Start': start,
            'Chunk_End': end,
            'Mean': mean_power_db,
            'Std': std_power_db,
            'Threshold': threshold,
            'Duty_Cycle': duty_cycle,
            'Avg_Power_Occupied': avg_power_occupied,
            'Variance_Power_Occupied': variance_power_occupied  # Added variance calculation
        })

    # Convert the list of dicts to a DataFrame
    aggregate_results_df = pd.DataFrame(aggregate_results)

    # Aggregate the results
    # You can choose how to aggregate, here's a simple mean aggregation
    final_mean_threshold = aggregate_results_df['Threshold'].mean()
    final_duty_cycle = aggregate_results_df['Duty_Cycle'].mean()
    final_avg_power_occupied = aggregate_results_df['Avg_Power_Occupied'].mean()
    final_variance_power_occupied = aggregate_results_df['Variance_Power_Occupied'].mean()

    print(aggregate_results_df)
    print(f"Final Mean Threshold: {final_mean_threshold}")
    print(f"Final Duty Cycle: {final_duty_cycle}")
    print(f"Final Average Power Occupied: {final_avg_power_occupied}")
    print(f"Final Variance of Power Occupied: {final_variance_power_occupied}")


    # Filter rows where frequency is between band start and ends
    
    ## 3350-3400
    #filtered_df = combined_df[(combined_df['frequency'] >= band_start) & (combined_df['frequency'] <= band_end)& ((combined_df['frequency'] <= 3358.5) | (combined_df['frequency'] >= 3361)) & ((combined_df['frequency'] <= 3383) | (combined_df['frequency'] >= 3385.5))]
    
    ## 3470-3520
    filtered_df = combined_df[(combined_df['frequency'] >= band_start) & (combined_df['frequency'] <= band_end)]

    plt.figure(figsize=(20, 15))
    plt.scatter(filtered_df["frequency"], filtered_df["power"], marker="*", s=0.3)
    plt.xlabel("Frequency (MHz)")
    plt.ylabel("Power (dB)")
    plt.title("Monitor @"+monitor_foldername)
    plt.show()
    # Define a power threshold, for example, -90 dB
    power_threshold = threshold

    # Count the number of rows above the power threshold
    occupied_count = filtered_df[filtered_df['power'] > power_threshold].shape[0]

    # Calculate the total number of rows in the filtered DataFrame
    total_count = filtered_df.shape[0]

    # Calculate the duty cycle
    duty_cycle = (occupied_count / total_count) * 100

    print(duty_cycle)
    matplotlib.rcParams['font.family'] = 'Times New Roman'
    fig, ax = plt.subplots(figsize=(8, 6))
    plt.hist(filtered_df["power"], bins=30, color='green')
    plt.ylabel("Count", fontsize=20)
    plt.xlabel("Power [dBX]", fontsize=20)
    plt.title(str(band_start) + "-" + str(band_end) + " MHz Power Histogram @ "+monitor_foldername, fontsize=24)
    #ax.set_xticklabels(fontsize=14, rotation=0)
    ax.tick_params(axis='x', labelsize=16)
    ax.tick_params(axis='y', labelsize=14)
    plt.axvline(x=threshold, color='red', linestyle='dashed')
    plt.text(threshold + 0.5, plt.ylim()[1] * 0.75, 'Threshold', color='red', fontsize=18)
    plt.show()

In [None]:
occupancy("Bookstore", -105, 3470, 3510)

In [None]:
occupancy("EBC", -105, 3470, 3510)

In [None]:
occupancy("Guesthouse", -105, 3470, 3510)

In [None]:
occupancy("Moran", -105, 3470, 3510)

In [None]:
occupancy("WEB", -105, 3470, 3510)

In [None]:
occupancy("Sagepoint", -105, 3470, 3510)

In [None]:
occupancy("Law73", -105, 3470, 3510)

In [None]:
occupancy("Humanities", -105, 3470, 3510)

In [None]:
occupancy("Madsen", -105, 3470, 3510)

In [None]:
occupancy("Garage", -105, 3470, 3510)

In [None]:
occupancy("Bookstore", -105, 3610, 3650)

In [None]:
occupancy("EBC", -105, 3610, 3650)

In [None]:
occupancy("Guesthouse", -105, 3610, 3650)

In [None]:
occupancy("Moran", -105, 3610, 3650)

In [None]:
occupancy("WEB", -105, 3610, 3650)

In [None]:
occupancy("Sagepoint", -105, 3610, 3650)

In [None]:
occupancy("Law73", -105, 3610, 3650)

In [None]:
occupancy("Humanities", -105, 3610, 3650)

In [None]:
occupancy("Madsen", -105, 3610, 3650)

In [None]:
occupancy("Garage", -105, 3610, 3650)

In [None]:
occupancy("Bookstore", -100, -100, 2160.5, 2169.5)

In [None]:
occupancy("EBC",  -100, -100, 2160.5, 2169.5)

In [None]:
occupancy("Guesthouse",  -100, -100, 2160.5, 2169.5)

In [None]:
occupancy("Moran",  -100, -100, 2160.5, 2169.5)

In [None]:
occupancy("WEB",  -100, -100, 2160.5, 2169.5)

In [None]:
occupancy("Sagepoint",  -100, -100, 2160.5, 2169.5)

In [None]:
occupancy("Law73", -100, -100, 2160.5, 2169.5)

In [None]:
occupancy("Humanities",  -100, -100, 2160.5, 2169.5)

In [None]:
occupancy("Madsen",  -100, -100, 2160.5, 2169.5)

In [None]:
occupancy("Garage", -100, -100, 2160.5, 2169.5)

In [None]:
occupancy("Bookstore", -100, -90, 2504, 2544)

In [None]:
occupancy("EBC", -100, -90, 2504, 2544)

In [None]:
occupancy("Guesthouse", -100, -90, 2504, 2544)

In [None]:
occupancy("Moran", -100, -90, 2504, 2544)

In [None]:
occupancy("WEB", -100, -90, 2504, 2544)

In [None]:
occupancy("Sagepoint", -100, -90, 2504, 2544)

In [None]:
occupancy("Law73", -100, -90, 2504, 2544)

In [None]:
occupancy("Humanities", -100, -90, 2504, 2544)

In [None]:
occupancy("Madsen", -100, -90, 2504, 2544)

In [None]:
occupancy("Garage", -100, -90, 2504, 2544)

In [None]:
occupancy("Bookstore", -100, -100, 1955, 1970)

In [None]:
occupancy("EBC", -100, -100, 1955, 1970)

In [None]:
occupancy("Guesthouse", -100, -100, 1955, 1970)

In [None]:
occupancy("Moran", -100, -100, 1955, 1970)

In [None]:
occupancy("WEB", -100, -100, 1955, 1970)

In [None]:
occupancy("Sagepoint", -100, -100, 1955, 1970)

In [None]:
occupancy("Law73", -100, -100, 1955, 1970)

In [None]:
occupancy("Humanities", -100, -100, 1955, 1970)

In [None]:
occupancy("Madsen", -100, -100, 1955, 1970)

In [None]:
occupancy("Garage", -100, -100, 1955, 1970)

In [None]:
occupancy_dict = {"Bookstore": [45.00, -89.90], "EBC": [89.79, -94.60], "Guesthouse":[76.90, -99.32], "Moran":[21.23, -101.14], "WEB": [42.88, -91.95], "Sagepoint": [56.00, -98.58], "Law73":[44.92, -86.17], "Humanities": [71.66, -97.28], "Madsen":[44.29, -100.86], "Garage": [99.00, -87.82]}

In [None]:
occupancy("Madsen", -106, 3470, 3510)

In [None]:
occupancy("Garage", -106, 3470, 3510)

In [None]:
import gzip
import pandas as pd
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
def occupancy2(monitor_foldername):
    sns.set_theme()

    def read_gzipped_csv(file_path):
        with gzip.open(file_path, 'rt') as file:
            df = pd.read_csv(file)
            return df


    def extract_timestamp_from_filename(filename):
        # Extract the Unix timestamp using a regular expression
        match = re.search(r'-(\d+)\.csv\.gz$', filename)
        if match:
            return int(match.group(1))
        else:
            return None



    def concatenate_csvs_in_folder(folder_path):
        dataframes_dict = {}

        for filename in os.listdir(folder_path):
            if filename.endswith('.gz'):
                # Extract the base name before "_"
                base_name = filename.split('_')[0]
                file_path = os.path.join(folder_path, filename)
                timestamp = extract_timestamp_from_filename(filename)
                df = read_gzipped_csv(file_path)
                df['timestamp'] = timestamp

                # Append the dataframe to the list in the dictionary for the base name
                if base_name not in dataframes_dict:
                    dataframes_dict[base_name] = []
                dataframes_dict[base_name].append(df)

        # Concatenate dataframes in the dictionary and store them in a new dictionary
        concatenated_dfs = {}
        for base_name, dfs in dataframes_dict.items():
            if dfs:
                concatenated_dfs[base_name] = pd.concat(dfs, ignore_index=True)

        return concatenated_dfs


    # Example usage
    folder_path = './rfbaseline/'+monitor_foldername+"/"
    combined_df = concatenate_csvs_in_folder(folder_path)
    print(combined_df)
    
    for key in combined_df.keys():
        occupancy3(combined_df[key], key)
    

occupancy2("Emulab")

In [None]:
import gzip
import pandas as pd
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def occupancy2(monitor_foldername):
    sns.set_theme()

    def read_gzipped_csv(file_path, band_start, band_end):
        with gzip.open(file_path, 'rt') as file:
            df = pd.read_csv(file)
            # Filter out frequencies outside the band range
            return df[(df['frequency'] >= band_start) & (df['frequency'] <= band_end)]

    def extract_timestamp_from_filename(filename):
        match = re.search(r'-(\d+)\.csv\.gz$', filename)
        if match:
            return int(match.group(1))
        else:
            return None

    def concatenate_csvs_in_folder(folder_path, band_start, band_end):
        dataframes_dict = {}

        for filename in os.listdir(folder_path):
            if filename.endswith('.gz'):
                base_name = filename.split('_')[0]
                file_path = os.path.join(folder_path, filename)
                timestamp = extract_timestamp_from_filename(filename)
                df = read_gzipped_csv(file_path, band_start, band_end)
                df['timestamp'] = timestamp

                if base_name not in dataframes_dict:
                    dataframes_dict[base_name] = []
                dataframes_dict[base_name].append(df)

        concatenated_dfs = {}
        for base_name, dfs in dataframes_dict.items():
            if dfs:
                concatenated_dfs[base_name] = pd.concat(dfs, ignore_index=True)

        return concatenated_dfs

    # Define the start and end of the band (in MHz)
    band_start = 3550
    band_end = 3600

    folder_path = './rfbaseline/' + monitor_foldername + "/"
    combined_df = concatenate_csvs_in_folder(folder_path, band_start, band_end)
    for key in combined_df.keys():
        print(len(combined_df[key]))
        occupancy3(combined_df[key], key)

def occupancy3(df_, node_name):
    def avg_pow(powers):
        pow_mean = np.mean(powers)
        return pow_mean

    df_ = df_.drop(columns=['center_freq'])

    df = df_.groupby('frequency')['power'].apply(avg_pow).reset_index()

    # Define the start and end of the CBRS band (in MHz)
    band_start = 3550
    band_end = 3600
    chunk_size = 50  # in MHz

    aggregate_results = []

    for start in range(band_start, band_end, chunk_size):
        end = start + chunk_size
        chunk_df = df[(df['frequency'] >= start) & (df['frequency'] < end)]

        mean_power_db = np.mean(chunk_df['power'])
        std_power_db = np.std(chunk_df['power'])

        threshold = -138

        occupied_df = chunk_df[chunk_df['power'] > threshold]
        duty_cycle = len(occupied_df) / len(chunk_df) * 100
        avg_power_occupied = avg_pow(occupied_df['power']) if not occupied_df.empty else np.nan

        aggregate_results.append({
            'Chunk_Start': start,
            'Chunk_End': end,
            'Mean': mean_power_db,
            'Std': std_power_db,
            'Threshold': threshold,
            'Duty_Cycle': duty_cycle,
            'Avg_Power_Occupied': avg_power_occupied
        })

    aggregate_results_df = pd.DataFrame(aggregate_results)

    final_mean_threshold = aggregate_results_df['Threshold'].mean()
    final_duty_cycle = aggregate_results_df['Duty_Cycle'].mean()
    final_avg_power_occupied = aggregate_results_df['Avg_Power_Occupied'].mean()

    print(aggregate_results_df)
    print(f"Final Mean Threshold: {final_mean_threshold}")
    print(f"Final Duty Cycle: {final_duty_cycle}")
    print(f"Final Average Power Occupied: {final_avg_power_occupied}")

    filtered_df = df[(df['frequency'] >= band_start) & (df['frequency'] <= band_end)]

    plt.figure(figsize=(20, 15))
    plt.scatter(filtered_df["frequency"], filtered_df["power"], marker="*", s=0.3)
    plt.xlabel("Frequency (MHz)")
    plt.ylabel("Power (dB)")
    plt.title("Monitor @" + node_name)
    plt.show()
    # Define a power threshold, for example, -90 dB
    power_threshold = threshold

    # Count the number of rows above the power threshold
    occupied_count = filtered_df[filtered_df['power'] > power_threshold].shape[0]

    # Calculate the total number of rows in the filtered DataFrame
    total_count = filtered_df.shape[0]

    # Calculate the duty cycle
    duty_cycle = (occupied_count / total_count) * 100

    print(duty_cycle)

    plt.figure()
    plt.hist(filtered_df["power"], bins=20)
    plt.show()
    
occupancy2("Emulab")


### Scrape the Emulab Website

In [None]:
import requests
from bs4 import BeautifulSoup

url = 'https://ops.emulab.net/rfbaseline/Emulab/'

response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    
    links = soup.find_all('a', href=True)
    
    file_urls = [link['href'] for link in links if link['href'].endswith('.gz')]
    
    for file_url in file_urls:
        if (file_url.startswith("./cbrssdr1-ustar") and int(file_url.split("-")[-1].split(".")[0])>1611516022) :
            download_response = requests.get(url+file_url[2:])
            if download_response.status_code == 200:
                filename = file_url.split('/')[-1]
                filename = filename.split(":")[0]+"_"+filename.split(":")[1]
                filepath = f'./emulab_data/{filename}'
                with open(filepath, 'wb') as file:
                    file.write(download_response.content)
            else:
                print(f'Failed to download {file_url}')
        else:
            continue
else:
    print('Failed to retrieve the webpage')
