In [9]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tabulate import tabulate
# Set random seed for reproducibility
np.random.seed(42)

# Define number of samples
num_samples = 1000

# Generate random values within meaningful ranges
data = {
    'Signal': [f'Signal_{i}' for i in range(num_samples)],
    'Fan_In': np.random.randint(1, 15, num_samples),  # Typical range
    'Fan_Out': np.random.randint(1, 10, num_samples),
    'Gate_Count': np.random.randint(5, 100, num_samples),
}

# Compute logical depth using the formula
data['Combinational_Depth'] = (
    (data['Gate_Count'] / 8) + (data['Fan_In'] / 3) + (data['Fan_Out'] / 2)
).astype(float)

# Convert to DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv("synthetic_rtl_data.csv", index=False)

# Display first few rows
print(df.head())


     Signal  Fan_In  Fan_Out  Gate_Count  Combinational_Depth
0  Signal_0       7        3          21             6.458333
1  Signal_1       4        1          30             5.583333
2  Signal_2      13        5          12             8.333333
3  Signal_3      11        4          51            12.041667
4  Signal_4       8        8          80            16.666667


In [10]:
# Load dataset (Replace with actual dataset path)
df = pd.read_csv("/content/synthetic_rtl_data.csv")

In [11]:
df.head()

Unnamed: 0,Signal,Fan_In,Fan_Out,Gate_Count,Combinational_Depth
0,Signal_0,7,3,21,6.458333
1,Signal_1,4,1,30,5.583333
2,Signal_2,13,5,12,8.333333
3,Signal_3,11,4,51,12.041667
4,Signal_4,8,8,80,16.666667


In [12]:
df.shape

(1000, 5)

In [13]:
# Automatically determine Low and High categories using median split
df["Depth_Category"], bins = pd.qcut(df["Combinational_Depth"], q=2, labels=["Low", "High"], retbins=True)

# Round bin values to 2 decimal places
bins = [round(b, 2) for b in bins]

# Extract category ranges
low_range = (bins[0], bins[1])
high_range = (bins[1], bins[2])

# Print the new category ranges
print("**Automatically Determined Category Ranges**")
print(f"Low Depth Range: {low_range}")
print(f"High Depth Range: {high_range}")

# Encode target labels for binary classification
label_encoder = LabelEncoder()
df["Depth_Label"] = label_encoder.fit_transform(df["Depth_Category"])

**Automatically Determined Category Ranges**
Low Depth Range: (1.58, 11.08)
High Depth Range: (11.08, 20.5)


In [8]:
# Encode target labels
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df["Depth_Label"] = label_encoder.fit_transform(df["Depth_Category"])

In [22]:
# Features and target
X = df[['Fan_In', 'Fan_Out', 'Gate_Count']]
y = df["Depth_Label"]  # Encoded target variable

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define classification models
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier( eval_metric='mlogloss')
}

# Store evaluation metrics
results = []

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    # Calculate performance metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    # Store results in list
    results.append([name, accuracy, precision, recall, f1])

# Convert results into a DataFrame
results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "Precision", "Recall", "F1-Score"])

# Print results in table format
print(tabulate(results_df, headers="keys", tablefmt="grid"))

# Find the best model based on F1-score
best_model_name = results_df.sort_values(by="F1-Score", ascending=False).iloc[0]["Model"]
print(f"\n  Best Model: {best_model_name}")

# Save the best model, scaler, and label encoder
best_model = models[best_model_name]
with open("best_logic_depth_model.pkl", "wb") as f:
    pickle.dump(best_model, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

with open("label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

print("\n Best Model, Scaler, and Label Encoder saved successfully!")


+----+---------------------+------------+-------------+----------+------------+
|    | Model               |   Accuracy |   Precision |   Recall |   F1-Score |
|  0 | Logistic Regression |      0.995 |    0.99505  |    0.995 |   0.995    |
+----+---------------------+------------+-------------+----------+------------+
|  1 | Random Forest       |      0.965 |    0.965047 |    0.965 |   0.964999 |
+----+---------------------+------------+-------------+----------+------------+
|  2 | Gradient Boosting   |      0.96  |    0.960184 |    0.96  |   0.959996 |
+----+---------------------+------------+-------------+----------+------------+
|  3 | XGBoost             |      0.96  |    0.960184 |    0.96  |   0.959996 |
+----+---------------------+------------+-------------+----------+------------+

  Best Model: Logistic Regression

 Best Model, Scaler, and Label Encoder saved successfully!


In [24]:
# Load saved model, scaler, and label encoder
with open("/content/best_logic_depth_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("/content/scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

with open("/content/label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

depth_categories = list(label_encoder.classes_)  # Extract actual categories

def predict_combinational_depth():
    print("\n Enter RTL Signal Features ")
    fan_in = float(input("Enter Fan-In: "))
    fan_out = float(input("Enter Fan-Out: "))
    gate_count = float(input("Enter Gate Count: "))

    # Preprocess input data
    input_data = np.array([[fan_in, fan_out, gate_count]])
    input_data_df = pd.DataFrame(input_data, columns=["Fan_In", "Fan_Out", "Gate_Count"])
    input_data_scaled = scaler.transform(input_data_df)


    # Predict depth category
    prediction = model.predict(input_data_scaled)[0]
    predicted_category = label_encoder.inverse_transform([prediction])[0]

    # Print results
    print("\n Predicted Depth Category:", predicted_category)

    # Check if the predicted category is "High"
    if predicted_category.lower() == "high":  # Convert to lowercase to avoid case mismatch
        print("Timing Violation Detected! Consider optimization.")
    else:
        print("No Timing Violation Detected.")

# Run the prediction function
predict_combinational_depth()



 Enter RTL Signal Features 
Enter Fan-In: 4
Enter Fan-Out: 2
Enter Gate Count: 13

 Predicted Depth Category: Low
No Timing Violation Detected.
