In [9]:
import pandas as pd

def read_technique_ids_from_xlsx(xlsx_file):
    """
    Reads the technique IDs from the provided Excel (.xlsx) file.
    
    Parameters
    ----------
    xlsx_file : str
        Path to the Excel file containing the technique IDs.

    Returns
    -------
    list
        List of technique IDs from the Excel file.
    """
    df = pd.read_excel(xlsx_file, sheet_name='techniques')  # Adjust the sheet name if needed
    technique_ids = df['ID'].astype(str).tolist()  # Ensure IDs are treated as strings
    print(len(technique_ids))
    return technique_ids


def read_techniques_from_txt(txt_file):
    """
    Reads techniques from the provided text file.
    
    Parameters
    ----------
    txt_file : str
        Path to the text file containing techniques, one per line.

    Returns
    -------
    list
        List of techniques from the text file.
    """
    with open(txt_file, 'r', encoding='utf-8') as file:
        techniques = [line.strip() for line in file if line.strip()]

    print(len(techniques))
    return techniques


def find_missing_techniques(xlsx_file, txt_file):
    """
    Finds techniques from the text file that are not present in the Excel file.

    Parameters
    ----------
    xlsx_file : str
        Path to the Excel file containing the technique IDs.
    txt_file : str
        Path to the text file containing the techniques.

    Returns
    -------
    list
        List of techniques that are in the text file but not found in the Excel file.
    """
    technique_ids = read_technique_ids_from_xlsx(xlsx_file)
    techniques = read_techniques_from_txt(txt_file)

    # Find techniques not in the Excel file
    missing_techniques = [tech for tech in techniques if tech not in technique_ids]

    return missing_techniques

In [10]:
# Example usage
csv_file = r"C:\Users\Aakanksha Saha\Documents\CTITTP\enterprise-attack-v15.1-techniques.xlsx" 
txt_file = r"C:\Users\Aakanksha Saha\Documents\CTITTP\MITRETTPlist.txt"

missing_techniques = find_missing_techniques(csv_file, txt_file)
print(len(missing_techniques))
print("Techniques not found in the CSV file:")
print(missing_techniques)

637
461
107
Techniques not found in the CSV file:
['T1194', 'T1088', 'T1346', 'T1065', 'T1500', 'T1311', 'T1432', 'T1002', 'T1043', 'T1433', 'T1329', 'T1312', 'T1179', 'T1517', 'T1093', 'T0872', 'T1079', 'T1009', 'T0840', 'T1099', 'T1053.004', 'T1045', 'T1426', 'T1100', 'T1512', 'T1022', 'T1438', 'T1420', 'T1532', 'T1508', 'T0871', 'T1533', 'T1476', 'T1077', 'T1191', 'T1402', 'T1341', 'T1024', 'T1117', 'T1047.001', 'T1345', 'T1004', 'T1497.004', 'T1064', 'T1247', 'T1089', 'T1249', 'T1086', 'T1260', 'T1061', 'T0853', 'T1143', 'T0847', 'T1122', 'T1084', 'T1351', 'T1268', 'T0807', 'T1023', 'T1107', 'T1444', 'T1660', 'T0831', 'T0809', 'T1081', 'T1101', 'T1437', 'T1076', 'T1503.004', 'T1035', 'T1412', 'T1308', 'T1328', 'T1013', 'T1031', 'T1060', 'T1193', 'T1085', 'T0855', 'T1513', 'T1116', 'T1168', 'T1094', 'T1447', 'T1138', 'T1063', 'T1183', 'T1032', 'T1158', 'T1418', 'T1502', 'T1337', 'T1145', 'T1575', 'T1347', 'T1503.003', 'T1503', 'T1429', 'T1544', 'T1050', 'T1108', 'T1073', 'T1015', 'T