In [9]:
import os
from pydicom import dcmread

def get_dicom_stats(folder_path):
  """
  This function reads DICOM images from a folder and calculates various statistics.

  Args:
      folder_path: Path to the folder containing DICOM images.

  Returns:
      A dictionary containing statistics about the DICOM images.
  """
  stats = {
      "total_files": 0,
      "modalities": {},
      "image_shapes": [],
      "pixel_types": {}
  }

  for root, _, filenames in os.walk(folder_path):
    for filename in filenames:
      if filename.lower().endswith(".dcm"):
        filepath = os.path.join(root, filename)
        try:
          dcm = dcmread(filepath)
          stats["total_files"] += 1

          # Update modality statistics
          modality = dcm.Modality.upper()
          stats["modalities"][modality] = stats["modalities"].get(modality, 0) + 1

          # Update image shape statistics
          stats["image_shapes"].append(dcm.pixel_array.shape)

          # Update pixel type statistics
          pixel_type = dcm.PixelRepresentation
          stats["pixel_types"][pixel_type] = stats["pixel_types"].get(pixel_type, 0) + 1

        except IOError as e:
          print(f"Error reading DICOM file: {filepath}")

  return stats

if __name__ == "__main__":
  # Replace 'path/to/your/folder' with the actual path to your DICOM folder
  folder_path = "/Users/likhithravula/Documents/NEU/Northeastern/Capstone/NBIA Source Data/Raw/Pseudo-PHI-DICOM-Data"
  stats = get_dicom_stats(folder_path)

  print("DICOM Image Statistics:")
  print(f"Total Files: {stats['total_files']}")

  print("Modalities:")
  for modality, count in stats["modalities"].items():
    print(f"\t- {modality}: {count}")

  print("Image Shapes:")
  # You can print individual shapes or calculate summary statistics here
  #  print(stats["image_shapes"])

  print("Pixel Types:")
  for pixel_type, count in stats["pixel_types"].items():
    print(f"\t- {pixel_type}: {count}")



DICOM Image Statistics:
Total Files: 1693
Modalities:
	- CR: 4
	- CT: 268
	- DX: 10
	- PT: 1203
	- MG: 58
	- MR: 150
Image Shapes:
Pixel Types:
	- 0: 460
	- 1: 1233


In [18]:
modalities_count = []
for key in stats["modalities"].keys():
    count = {}
    count["name"] = key
    count["value"] = stats["modalities"][key]
    modalities_count.append(count)

modalities_count

[{'name': 'CR', 'value': 4},
 {'name': 'CT', 'value': 268},
 {'name': 'DX', 'value': 10},
 {'name': 'PT', 'value': 1203},
 {'name': 'MG', 'value': 58},
 {'name': 'MR', 'value': 150}]

In [4]:
from collections import Counter

# Count the occurrences of each element
category_counts = Counter(stats["image_shapes"])

# Print the counts
category_counts


Counter({(128, 128): 1058,
         (512, 512): 278,
         (512, 512, 3): 145,
         (256, 256): 131,
         (1421, 1024): 57,
         (2022, 2022): 7,
         (665, 512): 5,
         (1760, 2140): 3,
         (2330, 2656): 1,
         (1760, 1760): 1,
         (3328, 2560): 1,
         (1211, 888): 1,
         (1256, 888): 1,
         (394, 552): 1,
         (552, 394): 1,
         (2330, 2774): 1,
         (2330, 2846): 1})

In [15]:
stats["modalities"]

{'CR': 4, 'CT': 268, 'DX': 10, 'PT': 1203, 'MG': 58, 'MR': 150}

In [7]:
# stats["modalities"][''] = 
stats["modalities"].get("CR", 0)

1

In [14]:
for root, _, filenames in os.walk(folder_path):
    print(_, filenames)
    for filename in filenames:
      if filename.lower().endswith(".dcm"):
          filepath = os.path.join(root, filename)
          # print(filepath)

['8989193730', '6670427471', '7361647728', '4025360156', '7255997752', '339833062', '8834647487', '6451050561', '9189822998', '8189244869', '571403367', '9894340694', '8732322741', '3209648408', '292821506', '3642991663', '8155012288', '6614238035', '6415974217', '6774825273', '8548156246'] ['.DS_Store', 'LICENSE']
['04-06-2004-NA-NA-18038'] ['.DS_Store']
['1001.000000-NA-46518'] []
[] ['1-1.dcm']
['05-26-2000-NA-FORFILE CT ABD ANDOR PEL - CD-25398'] ['.DS_Store']
['5.000000-NEPHRO  4.0  B40f  M0.4-18678'] ['.DS_Store']
[] ['1-013.dcm', '1-007.dcm', '1-006.dcm', '1-012.dcm', '1-038.dcm', '1-004.dcm', '1-010.dcm', '1-011.dcm', '1-005.dcm', '1-039.dcm', '1-001.dcm', '1-015.dcm', '1-029.dcm', '1-028.dcm', '1-014.dcm', '1-016.dcm', '1-002.dcm', '1-003.dcm', '1-017.dcm', '1-058.dcm', '1-070.dcm', '1-064.dcm', '1-104.dcm', '1-105.dcm', '1-065.dcm', '1-071.dcm', '1-059.dcm', '1-067.dcm', '1-073.dcm', '1-098.dcm', '1-107.dcm', '1-106.dcm', '1-099.dcm', '1-072.dcm', '1-066.dcm', '1-062.dcm', '1

In [33]:
stats = {
    "total_files": 0,
    "modalities": {},
    "image_shapes": [],
    "pixel_types": {}
}
for root, _, filenames in os.walk(folder_path):
    for filename in filenames:
        if filename.lower().endswith(".dcm"):
            filepath = os.path.join(root, filename)
        try:
            dcm = dcmread(filepath)
            stats["total_files"] += 1
    
            # Update modality statistics
            modality = dcm.Modality.upper()
            stats["modalities"][modality] = stats["modalities"].get(modality, 0) + 1
    
            # Update image shape statistics
            stats["image_shapes"].append(dcm.pixel_array.shape)
    
            # Update pixel type statistics
            pixel_type = dcm.PixelRepresentation
            stats["pixel_types"][pixel_type] = stats["pixel_types"].get(pixel_type, 0) + 1
    
        except IOError as e:
            print(f"Error reading DICOM file: {filepath}")
    
modalities_count = []
for key in stats["modalities"].keys():
    count = {}
    count["name"] = key
    count["value"] = stats["modalities"][key]
    modalities_count.append(count)


image_shapes = Counter(stats["image_shapes"])

response_body = {
    "total_files": stats["total_files"],
    "modalities_count": modalities_count,
    "image_shapes": image_shapes.values(),
    "pixel_types": stats["pixel_types"]
}

In [34]:
response_body

{'total_files': 1730,
 'modalities_count': [{'name': 'DX', 'value': 22},
  {'name': 'CR', 'value': 15},
  {'name': 'CT', 'value': 276},
  {'name': 'PT', 'value': 1207},
  {'name': 'MG', 'value': 60},
  {'name': 'MR', 'value': 150}],
 'image_shapes': dict_values([7, 11, 282, 4, 4, 147, 7, 1060, 7, 3, 57, 131, 1, 3, 1, 1, 4]),
 'pixel_types': {0: 491, 1: 1239}}

In [64]:

def get_dicom_stats():
    """
    This function reads DICOM images from a folder and calculates various statistics.

    Args:
        folder_path: Path to the folder containing DICOM images.

    Returns:
        A dictionary containing statistics about the DICOM images.
    """
    folder_path = "/Users/likhithravula/Documents/NEU/Northeastern/Capstone/NBIA Source Data/Raw/Pseudo-PHI-DICOM-Data"
    try:
        stats = {
            "total_files": 0,
            "modalities": {},
            "image_shapes": [],
            "pixel_types": {}
        }
        for root, _, filenames in os.walk(folder_path):
            for filename in filenames:
                if filename.lower().endswith(".dcm"):
                    filepath = os.path.join(root, filename)
                    try:
                        dcm = dcmread(filepath)
                        stats["total_files"] += 1

                        # Update modality statistics
                        modality = dcm.Modality.upper()
                        stats["modalities"][modality] = stats["modalities"].get(modality, 0) + 1

                        # Update image shape statistics
                        stats["image_shapes"].append(dcm.pixel_array.shape)

                        # Update pixel type statistics
                        pixel_type = dcm.PixelRepresentation
                        stats["pixel_types"][pixel_type] = stats["pixel_types"].get(pixel_type, 0) + 1

                    except IOError as e:
                        print(f"Error reading DICOM file: {filepath}")

        modalities_count = []
        for key in stats["modalities"].keys():
            count = {}
            count["name"] = key
            count["value"] = stats["modalities"][key]
            modalities_count.append(count)

        
        image_shapes = Counter(stats["image_shapes"])

        image_shapes_count = []
        for key, value in image_shapes.items():
            count = {}
            count["name"] = key
            count["value"] = value
            image_shapes_count.append(count)

        response_body = {
            "total_files": stats["total_files"],
            "modalities_count": modalities_count,
            "image_shapes": image_shapes_count,
            "pixel_types": stats["pixel_types"]
        }

        print(response_body)
    except Exception as e:
        print(e)
        # return jsonify({"message": "Error happened at the server", "error": str(e)}), 500

In [69]:
stats["modalities"]

{'DX': 22, 'CR': 15, 'CT': 276, 'PT': 1207, 'MG': 60, 'MR': 150}

In [70]:
def calculate_percentage(data):
  """
  This function takes a dictionary as input and calculates the percentage of each category.

  Args:
      data: A dictionary containing the data.

  Returns:
      A list of dictionaries, where each dictionary represents a category and its percentage.
  """
  total = sum(data.values())
  return [
      {'name': key, 'value': round(value / total * 100, 2)} for key, value in data.items()
  ]

# Example usage
data = {'DX': 22, 'CR': 15, 'CT': 276, 'PT': 1207, 'MG': 60, 'MR': 150}
percentages = calculate_percentage(data)
print(percentages)


[{'name': 'DX', 'value': 1.27}, {'name': 'CR', 'value': 0.87}, {'name': 'CT', 'value': 15.95}, {'name': 'PT', 'value': 69.77}, {'name': 'MG', 'value': 3.47}, {'name': 'MR', 'value': 8.67}]


In [66]:
get_dicom_stats()

{'total_files': 1693, 'modalities_count': [{'name': 'CR', 'value': 4}, {'name': 'CT', 'value': 268}, {'name': 'DX', 'value': 10}, {'name': 'PT', 'value': 1203}, {'name': 'MG', 'value': 58}, {'name': 'MR', 'value': 150}], 'image_shapes': [{'name': (1760, 2140), 'value': 3}, {'name': (512, 512), 'value': 278}, {'name': (2330, 2656), 'value': 1}, {'name': (1760, 1760), 'value': 1}, {'name': (512, 512, 3), 'value': 145}, {'name': (2022, 2022), 'value': 7}, {'name': (128, 128), 'value': 1058}, {'name': (665, 512), 'value': 5}, {'name': (3328, 2560), 'value': 1}, {'name': (1421, 1024), 'value': 57}, {'name': (256, 256), 'value': 131}, {'name': (1211, 888), 'value': 1}, {'name': (1256, 888), 'value': 1}, {'name': (394, 552), 'value': 1}, {'name': (552, 394), 'value': 1}, {'name': (2330, 2774), 'value': 1}, {'name': (2330, 2846), 'value': 1}], 'pixel_types': {0: 460, 1: 1233}}


In [68]:
for key, value in image_shapes.items():
    print(key, value)

(2330, 2846) 7
(1760, 2140) 11
(512, 512) 282
(2330, 2656) 4
(1760, 1760) 4
(512, 512, 3) 147
(2022, 2022) 7
(128, 128) 1060
(665, 512) 7
(3328, 2560) 3
(1421, 1024) 57
(256, 256) 131
(1211, 888) 1
(1256, 888) 3
(394, 552) 1
(552, 394) 1
(2330, 2774) 4


In [58]:
json_data = json.dumps(response_body, indent=4)
print(json_data)

TypeError: Object of type dict_values is not JSON serializable

In [56]:
# from flask import jsonify

import json

r = json.dumps(response_body)
res = json.loads(r)

res

TypeError: Object of type dict_values is not JSON serializable

# Trigger model

In [85]:
import requests
 
API_URL = "https://api-inference.huggingface.co/models/outlier9322/dicomllm"
headers = {"Authorization": "Bearer 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"}
 
def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
    
output = query({
	"inputs": "DOB 26/02/1998 KARUNAKAR",
})

In [86]:
output

{'error': 'Model outlier9322/dicomllm is currently loading',
 'estimated_time': 48.3431396484375}

In [1]:
a = "/Users/likhithravula/Documents/NEU/Northeastern/Capstone/1-1.dcm"

a.split("/")[-1]

'1-1.dcm'