In [79]:
import json
import pandas as pd
import os
from sklearn.metrics import confusion_matrix
import numpy as np

In [80]:


def json_lines_to_pandas(file_path):
  """
  Reads a JSON Lines file with nested lists of dictionaries and converts it into a pandas DataFrame.

  Args:
    file_path: Path to the JSON Lines file.

  Returns:
    pandas.DataFrame: A DataFrame containing the data from the JSON Lines file.
  """

  data = []
  with open(file_path, 'r') as f:
    for line in f:
      try:
        line_data = json.loads(line)
        data.extend(line_data)  # Extend the list with the current line's data
      except json.JSONDecodeError:
        print(f"Error decoding line: {line}")
        continue

  df = pd.DataFrame(data)
  return df



In [81]:
base_state="west_bengal"
target_state="uttar_pradesh"
data=f"train_{base_state}_val_{target_state}"
print(data)
json_file_path = f"/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/runs/obb/{data}_obb_v1_best.pt_640_128_300_0.25_0.50/predictions.json"
df = json_lines_to_pandas(json_file_path)
display(df.head(3))
print(df.shape)
len(df)

train_west_bengal_val_uttar_pradesh


Unnamed: 0,image_id,category_id,score,rbox,poly
0,9081332_2940682,1,0.90348,"[400.007, 190.172, 24.796, 12.021, 0.502]","[407.983, 201.406, 413.768, 190.869, 392.032, ..."
1,9081332_2940682,1,0.6533,"[125.128, 330.742, 25.883, 12.84, 0.447]","[134.018, 342.129, 139.573, 330.553, 116.238, ..."
2,9078581_3229614,2,0.63813,"[37.436, 316.726, 10.523, 21.567, 0.195]","[40.509, 328.324, 44.686, 307.166, 34.362, 305..."


(4620, 5)


4620

In [82]:
#count the number of unique values in the category_id column
print(df['category_id'].nunique())
print(df['category_id'].unique())
print(df.columns)
print(df['category_id'].value_counts().to_markdown())



2
[1 2]
Index(['image_id', 'category_id', 'score', 'rbox', 'poly'], dtype='object')
|   category_id |   count |
|--------------:|--------:|
|             2 |    3613 |
|             1 |    1007 |


In [83]:
df_val=df[['image_id','category_id']]
display(df_val.head(3))

Unnamed: 0,image_id,category_id
0,9081332_2940682,1
1,9081332_2940682,1
2,9078581_3229614,2


In [84]:
def create_dataframe_from_labels(labels_dir):
  """
  Creates a pandas DataFrame from label files in the given directory.

  Args:
    labels_dir: Path to the directory containing the label files.

  Returns:
    pandas.DataFrame: A DataFrame with 'image_id' (filename) and 'category_id' columns.
  """

  data = []
  for filename in os.listdir(labels_dir):
    if filename.endswith(".txt"):
      filepath = os.path.join(labels_dir, filename)
      with open(filepath, "r") as f:
        first_line = f.readline().strip()
        category_id = first_line.split(" ")[0]
        image_id = os.path.splitext(filename)[0] # Remove the file extension
        data.append({"image_id": image_id, "category_id": category_id})

  df = pd.DataFrame(data)
  return df

In [85]:
ground_truth_file_path =f"/home/patel_zeel/kiln_compass_24/data/{target_state}/labels"
print(ground_truth_file_path)
df_gt = create_dataframe_from_labels(ground_truth_file_path)
display(df_gt.head(3))
print(df_gt['category_id'].unique())

/home/patel_zeel/kiln_compass_24/data/uttar_pradesh/labels


Unnamed: 0,image_id,category_id
0,9051063_3086524,1
1,9180395_3006724,2
2,8806159_3342435,1


['1' '2' '0']


In [86]:
merged_df = pd.merge(df_val, df_gt, on='image_id', suffixes=('_val', '_gt'))
display(merged_df.head(3))
#make the category_id_gt column as integer
merged_df['category_id_gt'] = merged_df['category_id_gt'].astype(int)
merged_df['correct'] = merged_df['category_id_val'] == merged_df['category_id_gt']
display(merged_df.head(3))
#display the rows whre the correct column is False
display(merged_df[merged_df['correct'] == False].head(2))
print(merged_df['correct'].value_counts())
#

Unnamed: 0,image_id,category_id_val,category_id_gt
0,9081332_2940682,1,2
1,9081332_2940682,1,2
2,9078581_3229614,2,2


Unnamed: 0,image_id,category_id_val,category_id_gt,correct
0,9081332_2940682,1,2,False
1,9081332_2940682,1,2,False
2,9078581_3229614,2,2,True


Unnamed: 0,image_id,category_id_val,category_id_gt,correct
0,9081332_2940682,1,2,False
1,9081332_2940682,1,2,False


correct
True     1953
False    1895
Name: count, dtype: int64


In [88]:
#make a confusion matrix where rows are pred_0,pred_1,pred_2 and column are gt_0,gt_1,gt_2
cm=confusion_matrix(merged_df['category_id_gt'], merged_df['category_id_val'])
# display(cm)
tp = np.diag(cm)  # True Positives for each class
fp = cm.sum(axis=0) - tp  # False Positives for each class
fn = cm.sum(axis=1) - tp  # False Negatives for each class
tn = cm.sum() - (tp + fp + fn)  # True Negatives for each class


# Metrics
with np.errstate(divide='ignore', invalid='ignore'):
    precision = np.nan_to_num(tp / (tp + fp), nan=0.0)  # Handle division by zero
    recall = np.nan_to_num(tp / (tp + fn), nan=0.0)  
    f1_score = np.nan_to_num(2 * (precision * recall) / (precision + recall), nan=0.0)  

confusion_matrix_df = pd.DataFrame(cm, columns=["GT_0", "GT_1", "GT_2"], index=["Pred_0", "Pred_1", "Pred_2"])

# Create DataFrame for metrics
metrics_df = pd.DataFrame({
    "Precision": precision,
    "Recall": recall,
    "F1 Score": f1_score,
    "TP": tp,
    "FP": fp,
    "FN": fn,
    "TN": tn
}, index=["CFCBK", "FCBK", "Zigzag"])

# Display DataFrames
print(f"Confusion Matrix: train_{base_state}_val_{target_state}")
display(confusion_matrix_df.style.background_gradient(cmap="Blues"))

# print(f"\nClass-wise Metrics: base={base_state}, target={target_state}")
# display(metrics_df.style.format("{:.2f}").background_gradient(cmap="Greens", subset=["Precision", "Recall", "F1 Score"]))
 # Calculate the mean of each metric
mean_precision = precision.mean()
mean_recall = recall.mean()
mean_f1_score = f1_score.mean()
mean_tp = tp.mean() if not np.isnan(tp.mean()) else '-'
mean_fp = fp.mean() if not np.isnan(fp.mean()) else '-'
mean_fn = fn.mean() if not np.isnan(fn.mean()) else '-'
mean_tn = tn.mean() if not np.isnan(tn.mean()) else '-'

# Add mean values to the DataFrame
metrics_df.loc["Mean"] = [mean_precision, mean_recall, mean_f1_score, mean_tp, mean_fp, mean_fn, mean_tn]

# Display the DataFrame
print(f"Class-wise Metrics with Mean: base={base_state}, target={target_state}")
display(metrics_df.style.format("{:.2f}").background_gradient(cmap="Greens"))


Confusion Matrix: train_west_bengal_val_uttar_pradesh


Unnamed: 0,GT_0,GT_1,GT_2
Pred_0,0,15,79
Pred_1,0,565,1529
Pred_2,0,272,1388


Class-wise Metrics with Mean: base=west_bengal, target=uttar_pradesh


Unnamed: 0,Precision,Recall,F1 Score,TP,FP,FN,TN
CFCBK,0.0,0.0,0.0,0.0,0.0,94.0,3754.0
FCBK,0.66,0.27,0.38,565.0,287.0,1529.0,1467.0
Zigzag,0.46,0.84,0.6,1388.0,1608.0,272.0,580.0
Mean,0.38,0.37,0.33,651.0,631.67,631.67,1933.67
