In [2]:
import os
import yaml
import pandas as pd

def count_labels_and_update_yaml(txt_folder, yaml_file, output_yaml_file):
    with open(yaml_file, 'r') as file:
        yaml_content = yaml.safe_load(file)
        labels_dict = yaml_content.get('names', {})
        labels_dict = {str(key): value for key, value in labels_dict.items()} 

    label_counts = {label: 0 for label in labels_dict.keys()}

    for filename in os.listdir(txt_folder):
        if filename.endswith(".txt"):
            txt_path = os.path.join(txt_folder, filename)
            with open(txt_path, 'r') as txt_file:
                for line in txt_file:
                    parts = line.split()
                    if len(parts) > 0:
                        label = parts[0]
                        if label in label_counts:
                            label_counts[label] += 1

    results_df = pd.DataFrame.from_dict(label_counts, orient='index', columns=['Count'])
    results_df.index.name = 'Label'
    results_df['Description'] = results_df.index.map(labels_dict)
    results_df = results_df.reset_index()

    overall_count = results_df['Count'].sum()
    print(f"Overall Count: {overall_count}") 

    results_df = results_df.sort_values(by='Count', ascending=False)

    for idx, row in enumerate(results_df.itertuples(), start=1):
        print(f"{idx}. {row.Description} {row.Label} {row.Count}")

    top_30 = results_df.head(30)
    print("\nTop 30 Labels:")
    print(top_30)

    with open(yaml_file, 'r') as file:
        yaml_content = yaml.safe_load(file)

    new_names = {int(row.Label): yaml_content['names'][int(row.Label)] for row in top_30.itertuples()}
    yaml_content['names'] = new_names  

    with open(output_yaml_file, 'w') as file:
        yaml.dump(yaml_content, file, default_flow_style=False)

    print(f"The updated YAML file with top 30 labels is saved as '{output_yaml_file}'.")

txt_folder = "./txt" 
yaml_file = "./desired.yaml" 
output_yaml_file = "./desired_top_30.yaml"  

count_labels_and_update_yaml(txt_folder, yaml_file, output_yaml_file)

Overall Count: 5473
1. BSB 0 567
2. RSB 11 521
3. BLB 1 509
4. BSR 3 282
5. RMB 10 279
6. HSR 44 230
7. BLR 2 219
8. BSY 6 198
9. RSA 14 189
10. GLL 27 177
11. FSR 31 169
12. BSW 4 166
13. BSL 5 156
14. FSB 33 155
15. BST 7 149
16. GLG 25 142
17. RSR 16 117
18. RSY 15 113
19. JLL 39 109
20. FLR 32 108
21. HLR 43 104
22. FLB 34 90
23. BSP 8 82
24. BRLR 70 80
25. PSS 52 62
26. BRSR 71 60
27. R4B 17 45
28. GLP 28 44
29. GRLL 69 43
30. PLS 51 38
31. RLB 9 38
32. GLB 29 37
33. BRSB 73 34
34. HSFF 49 33
35. HSS 46 26
36. GLR 26 21
37. HSP 48 19
38. GBLG 83 16
39. FLP 38 8
40. FSP 37 7
41. MLB 78 5
42. GRLB 68 4
43. PSB 54 4
44. RGB 24 4
45. FSO 35 3
46. MLG 77 2
47. MLY 79 2
48. GLY 30 2
49. DSS 82 2
50. GBLO 85 2
51. GBLR 84 1
52. PBSS 67 0
53. PBLS 66 0
54. PBSO 65 0
55. FSSY 76 0
56. BRLB 72 0
57. FSLY 74 0
58. FSMY 75 0
59. PBSR 63 0
60. MLL 80 0
61. DSR 81 0
62. PBLO 64 0
63. RSO 21 0
64. PBLR 62 0
65. JLR 42 0
66. RSP 23 0
67. RLO 20 0
68. RSG 19 0
69. RLG 18 0
70. RMA 13 0
71. FLO 36 