In [1]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [2]:
# Lists to store dataset entries
data = []

# User input for fixed RF value
fixed_type = input("Enter 'RF1' to fix RF1 or 'RF2' to fix RF2: ").strip().upper()
rf_freq=float(input("enter the frequency value"))
if fixed_type == "RF2":
    fixed_RF2 = float(input("Enter the fixed RF2 value: "))
    rf1_values = np.arange(1, 4.25, 0.25)
    dc_values = np.arange(3, -0.25, -0.25)
    for rf1, dc in zip(rf1_values, dc_values):
        frequencies = [193.1E+12 + n * rf_freq for n in range(-50,51,1)]
        data.append([rf1, fixed_RF2, dc] + frequencies)

elif fixed_type == "RF1":
    fixed_RF1 = float(input("Enter the fixed RF1 value: "))
    rf2_values = np.arange(0, 5.125, 0.125)
    dc_values = np.arange(4, -0.125, -0.125)
    for rf2, dc in zip(rf2_values, dc_values):
        frequencies = [193.1E+12 + n * rf_freq for n in range(-50,51,1)]
        data.append([fixed_RF1, rf2, dc] + frequencies)
else:
    print("Invalid input. Please enter 'RF1' or 'RF2'.")
    exit()

# Create DataFrame with frequency column names
df_columns = ["RF1", "RF2", "DC Value"] + [f"Freq_{n}" for n in range(-50,51)]
df = pd.DataFrame(data, columns=df_columns)

# Save to CSV
df.to_csv("rf_dc_dataset.csv", index=False)

print("Dataset saved as rf_dc_dataset.csv")

Dataset saved as rf_dc_dataset.csv


In [None]:
# Load the main dataset
dataset = pd.read_csv("rf_dc_dataset.csv")

# Load the Excel file with multiple RF2 sheets
excel_file = './data/pm.xlsx' # Change to your actual file name
excel_data = pd.ExcelFile(excel_file)  # Load the excel file
sheets = excel_data.sheet_names  # Get all sheet names

# Iterate through each row in the dataset and map power values
def map_power_values(row):
    rf2_value = str(row["RF2"])  # Extract RF2 value and convert to string for sheet matching
    power_values = {}

    if rf2_value in sheets:  # Check if RF2 sheet exists
        # Get RF2 data from the corresponding sheet using pandas.read_excel
        rf2_data = pd.read_excel(excel_file, sheet_name=rf2_value)

        for n in range(-50, 51):  # Iterate over all frequency columns
            freq_col = f"Freq_{n}"
            power_col = f"Power_{n}"

            freq_value = row[freq_col]

            # Find the power value for the matching frequency
            power_match = rf2_data.loc[rf2_data['Frequency'] == freq_value, 'Power']

            # Assign the power value or NaN if not found
            power_values[power_col] = power_match.iloc[0] if not power_match.empty else np.nan
    else:
        for n in range(-50, 51):
            power_values[f"Power_{n}"] = np.nan  # Assign NaN if RF2 sheet not found

    return pd.Series(power_values)

# Apply function to map power values
power_columns = dataset.apply(map_power_values, axis=1)
dataset = pd.concat([dataset, power_columns], axis=1)

# Save the updated dataset
dataset.to_csv("rf_dc_mapped_dataset.csv", index=False)
print("Mapped dataset saved as rf_dc_mapped_dataset.csv")

# Save the updated dataset
dataset.to_csv("rf_dc_mapped_dataset.csv", index=False)

print("Mapped dataset saved as rf_dc_mapped_dataset.csv")

KeyError: 'Frequency'

In [None]:
df=pd.read_csv('rf_dc_mapped_dataset.csv')
# Define thresholds
nan_threshold = 0.2 * len(df)  # Drop columns with more than 40% NaN values
negative_threshold = 0.01 * len(df)  # Drop columns where power values < -20 appear too often

# Identify power columns
power_cols = [col for col in df.columns if col.startswith("Power_")]

# Find power columns to drop based on NaN and power value < -20 threshold
power_cols_to_drop = [
    col for col in power_cols
    if df[col].isna().sum() > nan_threshold or (df[col] < -20).sum() > negative_threshold
]

# Find corresponding frequency columns to drop
freq_cols_to_drop = [
    col.replace("Power_", "Freq_") for col in power_cols_to_drop
    if col.replace("Power_", "Freq_") in df.columns
]

# Drop selected power and frequency columns
df_cleaned = df.drop(columns=power_cols_to_drop + freq_cols_to_drop)
df_dropped_rows = df_cleaned.dropna()
df_cleaned=df_dropped_rows
df_final=df_dropped_rows

In [None]:
# Compute the maximum power **for each row separately**
# Get the remaining power columns after cleaning
remaining_power_cols = [col for col in df_cleaned.columns if col.startswith("Power_")]

# Use remaining_power_cols instead of power_cols
df_cleaned["Max_Power"] = df_cleaned[remaining_power_cols].max(axis=1)
# Compute flatness within 2 dB range
def compute_flatness_2db(row):
    power_values = row[remaining_power_cols]
    max_power = row["Max_Power"]

    # Get all power values within 2 dB of max power
    power_within_2db = power_values[(max_power - power_values) <= 2]

    if power_within_2db.empty or power_within_2db.min() == 0:
        return None  # Avoid division by zero

    return power_within_2db.max() / power_within_2db.min()  # Compute flatness as ratio

df_cleaned["Flatness_2dB"] = df_cleaned.apply(compute_flatness_2db, axis=1)

# Count the number of frequency lines where power is within 2 dB of the row's max power
# Instead of apply on a subset, use apply on the whole DataFrame
df_cleaned["Frequency_Lines_2dB"] = df_cleaned.apply(
    lambda row: ((row["Max_Power"] - row[remaining_power_cols]) <= 2).sum(), axis=1
)

# Find the row with the highest flatness
max_flatness_row = df_cleaned.loc[df_cleaned["Flatness_2dB"].idxmax()]

# Filter power values where (row-wise Max_Power - Power_n) == 2
filtered_data = pd.DataFrame()
for col in remaining_power_cols:  # Use remaining_power_cols here as well
    freq_col = col.replace("Power_", "Freq_")

    # Condition check for each row separately
    mask = (df_cleaned["Max_Power"] - df_cleaned[col]) == 2
    filtered_data[f"Filtered_{col}"] = df_cleaned.loc[mask, col]
    if freq_col in df_cleaned.columns:
        filtered_data[f"Filtered_{freq_col}"] = df_cleaned.loc[mask, freq_col]

In [None]:
df_new=df_dropped_rows

In [None]:
print(df_new)
print(df_final)

In [None]:
features=['RF1','RF2','DC Value']
# Get the remaining power columns after cleaning
remaining_power_cols = [col for col in df_new.columns if col.startswith("Power_")]
# Update targets to only include remaining power columns
targets = remaining_power_cols
targets=df_final[targets].values
features=df_final[features].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.1, random_state=42)
scaler_features = StandardScaler()
X_train = scaler_features.fit_transform(X_train)
X_test = scaler_features.transform(X_test)
scaler_targets = StandardScaler()
y_train = scaler_targets.fit_transform(y_train)
y_test = scaler_targets.transform(y_test)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='tanh', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(32, activation='tanh'),
    tf.keras.layers.Dense(64, activation='tanh'),
    tf.keras.layers.Dense(32, activation='tanh'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(y_train.shape[1])  # Output layer with the number of targets
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=600, batch_size=8, verbose=1)

# Predict and evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("R-squared:", r2)


In [None]:
y_pred = scaler_targets.inverse_transform(y_pred)
y_test = scaler_targets.inverse_transform(y_test)
print(y_pred)
print(y_test)

In [None]:
# Assuming predicted_values is a NumPy array of shape (num_samples, num_power_columns)
# Get the remaining power columns after cleaning
remaining_power_cols = [col for col in df_new.columns if col.startswith("Power_")]
# Create DataFrame with columns corresponding to remaining power features
# Use the index of df_new to create a range of indices for predicted_powers
predicted_powers = pd.DataFrame(y_pred, columns=remaining_power_cols, index=df_new.index[-len(y_pred):]) # Use df_new.index for the index


# Create a copy of the original dataset (df_new) that was used for prediction
reconstructed_dataset = df_new.copy() #Changed df to df_new
# Replace power columns in reconstructed_dataset with predicted values
for col_name in remaining_power_cols:
    #Use the same index as predicted_powers to align rows
    reconstructed_dataset.loc[predicted_powers.index, col_name] = predicted_powers[col_name]  #Assign the predicted values to the corresponding rows in the original dataframe df_new, which was used for training.


In [None]:
print(reconstructed_dataset)

In [None]:
# Compute the maximum power **for each row separately**
reconstructed_dataset["Max_Power"] = reconstructed_dataset[remaining_power_cols].max(axis=1)
# Compute flatness within 2 dB range
def compute_flatness_2db(row):
    power_values = row[remaining_power_cols]
    max_power = row["Max_Power"]

    # Get all power values within 2 dB of max power
    power_within_2db = power_values[(max_power - power_values) <= 2]

    if power_within_2db.empty or power_within_2db.min() == 0:
        return None  # Avoid division by zero

    return power_within_2db.max() / power_within_2db.min()  # Compute flatness as ratio

reconstructed_dataset["Flatness_2dB"] = reconstructed_dataset.apply(compute_flatness_2db, axis=1)

# Count the number of frequency lines where power is within 2 dB of the row's max power
reconstructed_dataset["Frequency_Lines_2dB"] = reconstructed_dataset.apply(
    lambda row: ((row["Max_Power"] - row[remaining_power_cols]) <= 2).sum(), axis=1
)

# Find the row with the highest flatness
max_flatness_row = reconstructed_dataset.loc[reconstructed_dataset["Flatness_2dB"].idxmax()]

# Filter power values where (row-wise Max_Power - Power_n) == 2
filtered_data = pd.DataFrame()
for col in remaining_power_cols:  # Use remaining_power_cols here as well
    freq_col = col.replace("Power_", "Freq_")

    # Condition check for each row separately
    mask = (reconstructed_dataset["Max_Power"] - reconstructed_dataset[col]) == 2
    filtered_data[f"Filtered_{col}"] = reconstructed_dataset.loc[mask, col]
    if freq_col in reconstructed_dataset.columns:
        filtered_data[f"Filtered_{freq_col}"] = reconstructed_dataset.loc[mask, freq_col]


In [None]:
print(max_flatness_row)

In [None]:
print(reconstructed_dataset)

In [None]:
import pandas as pd

# Normalize both columns (min-max normalization)
reconstructed_dataset["Flatness_2dB_norm"] = (
    reconstructed_dataset["Flatness_2dB"] - reconstructed_dataset["Flatness_2dB"].min()
) / (reconstructed_dataset["Flatness_2dB"].max() - reconstructed_dataset["Flatness_2dB"].min())

reconstructed_dataset["Frequency_Lines_2dB_norm"] = (
    reconstructed_dataset["Frequency_Lines_2dB"] - reconstructed_dataset["Frequency_Lines_2dB"].min()
) / (reconstructed_dataset["Frequency_Lines_2dB"].max() - reconstructed_dataset["Frequency_Lines_2dB"].min())

# Combine both into a single score (e.g., average of the two normalized scores)
reconstructed_dataset["Combined_Score"] = (
    reconstructed_dataset["Flatness_2dB_norm"] + reconstructed_dataset["Frequency_Lines_2dB_norm"]
) / 2

# Find the row with the highest combined score
best_row = reconstructed_dataset.loc[reconstructed_dataset["Combined_Score"].idxmax()]

# Show the result
print("Row with both maximum Flatness_2dB and Frequency_Lines_2dB (combined):")
print(best_row)
