In [1]:
import pandas as pd

In [2]:
# Load the data

In [4]:
case_labels = pd.read_csv('case_label_filtered.csv')

In [5]:
condition_summary = pd.DataFrame({
    'count': case_labels['condition_1'].value_counts(),
    'percentage': case_labels['condition_1'].value_counts(normalize=True) * 100
})

# Round the percentage to two decimal places
condition_summary['percentage'] = condition_summary['percentage'].round(2)

# Sort by count in descending order
condition_summary = condition_summary.sort_values('count', ascending=False)

print(condition_summary.head(50))

                                         count  percentage
condition_1                                               
Eczema                                     123       18.78
Urticaria                                   81       12.37
Allergic Contact Dermatitis                 41        6.26
Folliculitis                                34        5.19
Insect Bite                                 29        4.43
Acute dermatitis, NOS                       26        3.97
O/E - ecchymoses present                    20        3.05
Herpes Zoster                               18        2.75
Herpes Simplex                              16        2.44
Psoriasis                                   16        2.44
Acne                                        14        2.14
Chronic dermatitis, NOS                     11        1.68
Tinea Versicolor                            11        1.68
Irritant Contact Dermatitis                 11        1.68
Tinea                                       11        1.

In [6]:
def count_images(row):
    count = 0
    for path in [row['image_1_path'], row['image_2_path'], row['image_3_path']]:
        if pd.notna(path) and path.strip() != '':
            count += 1
    return count

# Apply this function and check the results
case_labels['image_count'] = case_labels.apply(count_images, axis=1)
print(case_labels['image_count'].value_counts())
print(f"Total image count for Eczema: {case_labels[case_labels['condition_1'] == 'Eczema']['image_count'].sum()}")
# Count images for each condition
image_counts = case_labels.groupby('condition_1').apply(lambda x: x.apply(count_images, axis=1).sum()).reset_index()
image_counts.columns = ['condition_1', 'image_count']

# Merge with the summary DataFrame
conditions_summary_final = condition_summary.merge(image_counts, on='condition_1', how='left')

# Fill NaN values with 0 for conditions that don't have any images
conditions_summary_final['image_count'] = conditions_summary_final['image_count'].fillna(0).astype(int)

# Calculate the average number of images per case
conditions_summary_final['avg_images_per_case'] = conditions_summary_final['image_count'] / conditions_summary_final['count']

# Round to 2 decimal places
conditions_summary_final['avg_images_per_case'] = conditions_summary_final['avg_images_per_case'].round(2)

# Sort the DataFrame by count in descending order
conditions_summary_final = conditions_summary_final.sort_values('count', ascending=False)

# Display the updated DataFrame
print(conditions_summary_final)

image_count
1    300
3    265
2     90
Name: count, dtype: int64
Total image count for Eczema: 272
                          condition_1  count  percentage  image_count  \
0                              Eczema    123       18.78          272   
1                           Urticaria     81       12.37          150   
2         Allergic Contact Dermatitis     41        6.26           79   
3                        Folliculitis     34        5.19           62   
4                         Insect Bite     29        4.43           66   
..                                ...    ...         ...          ...   
63  Lichenified eczematous dermatitis      1        0.15            3   
64                  Traumatic blister      1        0.15            1   
65  Varicose veins of lower extremity      1        0.15            1   
66                              Milia      1        0.15            3   
89               Chicken pox exanthem      1        0.15            1   

    avg_images_per_case 

In [7]:
conditions_summary_final.sort_values('image_count', ascending=False)

Unnamed: 0,condition_1,count,percentage,image_count,avg_images_per_case
0,Eczema,123,18.78,272,2.21
1,Urticaria,81,12.37,150,1.85
2,Allergic Contact Dermatitis,41,6.26,79,1.93
4,Insect Bite,29,4.43,66,2.28
3,Folliculitis,34,5.19,62,1.82
...,...,...,...,...,...
75,Elephantiasis nostras,1,0.15,1,1.00
70,Vasculitis of the skin,1,0.15,1,1.00
81,Epidermal nevus,1,0.15,1,1.00
80,O/E - petechiae present,1,0.15,1,1.00


In [8]:
conditions_summary_final.reset_index(drop=True, inplace=True)

In [9]:
conditions_summary_final

Unnamed: 0,condition_1,count,percentage,image_count,avg_images_per_case
0,Eczema,123,18.78,272,2.21
1,Urticaria,81,12.37,150,1.85
2,Allergic Contact Dermatitis,41,6.26,79,1.93
3,Folliculitis,34,5.19,62,1.82
4,Insect Bite,29,4.43,66,2.28
...,...,...,...,...,...
85,Lichenified eczematous dermatitis,1,0.15,3,3.00
86,Traumatic blister,1,0.15,1,1.00
87,Varicose veins of lower extremity,1,0.15,1,1.00
88,Milia,1,0.15,3,3.00


In [10]:
# Save the summary to a CSV file
conditions_summary_final.to_csv('conditions_summary.csv', index=False)

In [11]:
case_labels

Unnamed: 0,case_id,body_part,image_1_path,image_2_path,image_3_path,image_1_shot_type,image_2_shot_type,image_3_shot_type,combined_race,condition_1,confidence_1,avg_skin_type,skin_type_category,image_count
0,-1022162013984621110,,dataset/images/8406712750126834903.png,,,CLOSE_UP,,,,Purpura,1.0,4.0,brown,1
1,-1033431645373513522,,dataset/images/-1677898261371801194.png,dataset/images/2587768503558995392.png,dataset/images/5836170508263925618.png,CLOSE_UP,AT_DISTANCE,AT_AN_ANGLE,,Urticaria,1.0,3.0,fair,3
2,-1073544188024944010,,dataset/images/-1542787778564695920.png,dataset/images/-3253987854671575512.png,dataset/images/108462233489916244.png,CLOSE_UP,AT_DISTANCE,AT_AN_ANGLE,,Urticaria,1.0,,,3
3,-1079717215777778516,,dataset/images/-3060870142909393201.png,,,CLOSE_UP,,,,Eczema,1.0,3.0,fair,1
4,-1120481478267460560,leg,dataset/images/-3933475004882152757.png,dataset/images/-6233451743223829948.png,dataset/images/-764324206369620251.png,CLOSE_UP,AT_DISTANCE,AT_AN_ANGLE,,Eczema,1.0,3.0,fair,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
650,9094416934450217084,,dataset/images/-4639546377033199002.png,,,CLOSE_UP,,,,Chicken pox exanthem,1.0,2.0,white,1
651,9105346707210308949,arm,dataset/images/-3238683302029267521.png,dataset/images/-7141594063372177636.png,dataset/images/-8970062608215251005.png,AT_AN_ANGLE,CLOSE_UP,AT_DISTANCE,,Eczema,1.0,2.0,white,3
652,930124461134778983,leg,dataset/images/-2787315420624607465.png,dataset/images/-8048467112742368195.png,dataset/images/6371452221179710784.png,CLOSE_UP,AT_AN_ANGLE,AT_DISTANCE,WHITE,Stasis Dermatitis,1.0,3.0,fair,3
653,935346003160692168,,dataset/images/-8779903138873713227.png,,,CLOSE_UP,,,,Psoriasis,1.0,3.0,fair,1


In [12]:
filtered_case_labels = case_labels[case_labels['condition_1'].isin([
    'Eczema',
    'Urticaria',
    'Allergic Contact Dermatitis',
    'Folliculitis',
    'Acne',
    'Psoriasis',
    'Herpes Simplex',
    'Herpes Zoster',
    'Tinea',
    'Atopic Dermatitis'
])]

In [13]:
filtered_case_labels

Unnamed: 0,case_id,body_part,image_1_path,image_2_path,image_3_path,image_1_shot_type,image_2_shot_type,image_3_shot_type,combined_race,condition_1,confidence_1,avg_skin_type,skin_type_category,image_count
1,-1033431645373513522,,dataset/images/-1677898261371801194.png,dataset/images/2587768503558995392.png,dataset/images/5836170508263925618.png,CLOSE_UP,AT_DISTANCE,AT_AN_ANGLE,,Urticaria,1.0,3.0,fair,3
2,-1073544188024944010,,dataset/images/-1542787778564695920.png,dataset/images/-3253987854671575512.png,dataset/images/108462233489916244.png,CLOSE_UP,AT_DISTANCE,AT_AN_ANGLE,,Urticaria,1.0,,,3
3,-1079717215777778516,,dataset/images/-3060870142909393201.png,,,CLOSE_UP,,,,Eczema,1.0,3.0,fair,1
4,-1120481478267460560,leg,dataset/images/-3933475004882152757.png,dataset/images/-6233451743223829948.png,dataset/images/-764324206369620251.png,CLOSE_UP,AT_DISTANCE,AT_AN_ANGLE,,Eczema,1.0,3.0,fair,3
6,-1204349630028029673,foot_top_or_side,dataset/images/-3136247653193336048.png,,,CLOSE_UP,,,,Tinea,1.0,3.0,fair,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647,9055010458301656007,arm,dataset/images/-862558295135857219.png,dataset/images/6837737674648343890.png,dataset/images/8795885615476833533.png,CLOSE_UP,AT_DISTANCE,AT_AN_ANGLE,,Eczema,1.0,4.0,brown,3
649,9079077127826406933,arm,dataset/images/-5432546180216775741.png,dataset/images/-7343000754918930454.png,dataset/images/2021255960758992412.png,AT_DISTANCE,CLOSE_UP,AT_AN_ANGLE,WHITE,Eczema,1.0,3.0,fair,3
651,9105346707210308949,arm,dataset/images/-3238683302029267521.png,dataset/images/-7141594063372177636.png,dataset/images/-8970062608215251005.png,AT_AN_ANGLE,CLOSE_UP,AT_DISTANCE,,Eczema,1.0,2.0,white,3
653,935346003160692168,,dataset/images/-8779903138873713227.png,,,CLOSE_UP,,,,Psoriasis,1.0,3.0,fair,1


In [14]:
len(filtered_case_labels)

354

In [18]:
# Moving images to the training data directory

In [21]:
import os
import shutil
# Base directory for the training data
base_dir = 'train_data'

# Ensure the base directory exists
os.makedirs(base_dir, exist_ok=True)

# Function to move an image if it exists
def move_image(src, dst):
    if os.path.exists(src):
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        shutil.move(src, dst)
        print(f"Moved {src} to {dst}")
    else:
        print(f"File not found: {src}")

# Iterate through the filtered dataframe
for index, row in filtered_case_labels.iterrows():
    condition = row['condition_1']
    condition_dir = os.path.join(base_dir, condition)

    # Ensure the condition directory exists
    os.makedirs(condition_dir, exist_ok=True)

    # Move images if they exist
    for i in range(1, 4):  # We have up to 3 images per case
        image_path = row.get(f'image_{i}_path')
        if pd.notna(image_path):
            new_path = os.path.join(condition_dir, os.path.basename(image_path))
            move_image(image_path, new_path)

print("Image moving process completed.")

Moved dataset/images/-1677898261371801194.png to train_data/Urticaria/-1677898261371801194.png
Moved dataset/images/2587768503558995392.png to train_data/Urticaria/2587768503558995392.png
Moved dataset/images/5836170508263925618.png to train_data/Urticaria/5836170508263925618.png
Moved dataset/images/-1542787778564695920.png to train_data/Urticaria/-1542787778564695920.png
Moved dataset/images/-3253987854671575512.png to train_data/Urticaria/-3253987854671575512.png
Moved dataset/images/108462233489916244.png to train_data/Urticaria/108462233489916244.png
Moved dataset/images/-3060870142909393201.png to train_data/Eczema/-3060870142909393201.png
Moved dataset/images/-3933475004882152757.png to train_data/Eczema/-3933475004882152757.png
Moved dataset/images/-6233451743223829948.png to train_data/Eczema/-6233451743223829948.png
Moved dataset/images/-764324206369620251.png to train_data/Eczema/-764324206369620251.png
Moved dataset/images/-3136247653193336048.png to train_data/Tinea/-31362

In [15]:
import pandas as pd
tagged_images = pd.read_csv('image_tags.csv')

In [16]:
tagged_images

Unnamed: 0,Image Path,Condition,Body Part
0,train_data/Herpes Simplex/-4548551644236086193...,Herpes Simplex,Arms
1,train_data/Herpes Simplex/8938987404946448422.png,Herpes Simplex,Neck
2,train_data/Herpes Simplex/-1340575480650711275...,Herpes Simplex,Arms
3,train_data/Herpes Simplex/-1545090829447090603...,Herpes Simplex,Hands
4,train_data/Herpes Simplex/2761011390199798183.png,Herpes Simplex,Legs
...,...,...,...
432,train_data/Folliculitis/2432074176479085493.png,Folliculitis,Other
433,train_data/Folliculitis/1633175637881662694.png,Folliculitis,Neck
434,train_data/Folliculitis/-3334185101988508211.png,Folliculitis,Other
435,train_data/Folliculitis/-487619432479533708.png,Folliculitis,Legs


In [17]:
# Populating the tagged image data with condition labels

In [22]:
import pandas as pd

def clean_image_path(path, prefix_to_remove):
    if path and isinstance(path, str):
        return path.replace(prefix_to_remove, '')
    return path

def find_matching_row(image_path, filtered_df):
    # Remove 'train_data/' and everything up to the next '/' from tagged_images path
    cleaned_tagged_image_path = '/'.join(image_path.split('/')[2:])

    for _, row in filtered_df.iterrows():
        for i in range(1, 4):
            col_name = f'image_{i}_path'
            if col_name in row and row[col_name]:
                # Remove 'dataset/images/' from filtered_case_labels path
                cleaned_filtered_image_path = clean_image_path(row[col_name], 'dataset/images/')
                if cleaned_tagged_image_path == cleaned_filtered_image_path:
                    return {
                        'case_id': row['case_id'],
                        'image_shot_type': row[f'image_{i}_shot_type'],
                        'combined_race': row['combined_race'],
                        'skin_type_category': row['skin_type_category']
                    }
    return {
        'case_id': None,
        'image_shot_type': None,
        'combined_race': None,
        'skin_type_category': None
    }

# Clean the image paths in filtered_case_labels
for col in ['image_1_path', 'image_2_path', 'image_3_path']:
    filtered_case_labels[col] = filtered_case_labels[col].apply(lambda x: clean_image_path(x, 'dataset/images/'))

# Apply the function to each row in tagged_images
tagged_images[['case_id', 'image_shot_type', 'combined_race', 'skin_type_category']] = tagged_images['Image Path'].apply(
    lambda x: pd.Series(find_matching_row(x, filtered_case_labels))
)

# Display the updated tagged_images dataframe
print(tagged_images)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_case_labels[col] = filtered_case_labels[col].apply(lambda x: clean_image_path(x, 'dataset/images/'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_case_labels[col] = filtered_case_labels[col].apply(lambda x: clean_image_path(x, 'dataset/images/'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-v

                                            Image Path       Condition  \
0    train_data/Herpes Simplex/-4548551644236086193...  Herpes Simplex   
1    train_data/Herpes Simplex/8938987404946448422.png  Herpes Simplex   
2    train_data/Herpes Simplex/-1340575480650711275...  Herpes Simplex   
3    train_data/Herpes Simplex/-1545090829447090603...  Herpes Simplex   
4    train_data/Herpes Simplex/2761011390199798183.png  Herpes Simplex   
..                                                 ...             ...   
432    train_data/Folliculitis/2432074176479085493.png    Folliculitis   
433    train_data/Folliculitis/1633175637881662694.png    Folliculitis   
434   train_data/Folliculitis/-3334185101988508211.png    Folliculitis   
435    train_data/Folliculitis/-487619432479533708.png    Folliculitis   
436    train_data/Folliculitis/7980844269689605309.png    Folliculitis   

    Body Part              case_id image_shot_type              combined_race  \
0        Arms -763724047893912

In [23]:
tagged_images


Unnamed: 0,Image Path,Condition,Body Part,case_id,image_shot_type,combined_race,skin_type_category
0,train_data/Herpes Simplex/-4548551644236086193...,Herpes Simplex,Arms,-7637240478939124697,CLOSE_UP,WHITE,white
1,train_data/Herpes Simplex/8938987404946448422.png,Herpes Simplex,Neck,2084479032845321772,AT_DISTANCE,,brown
2,train_data/Herpes Simplex/-1340575480650711275...,Herpes Simplex,Arms,-2298741642819106068,AT_DISTANCE,BLACK_OR_AFRICAN_AMERICAN,brown
3,train_data/Herpes Simplex/-1545090829447090603...,Herpes Simplex,Hands,8290822761716679927,AT_DISTANCE,WHITE,fair
4,train_data/Herpes Simplex/2761011390199798183.png,Herpes Simplex,Legs,868379715655855009,AT_DISTANCE,WHITE,fair
...,...,...,...,...,...,...,...
432,train_data/Folliculitis/2432074176479085493.png,Folliculitis,Other,-4681186898904271014,CLOSE_UP,,white
433,train_data/Folliculitis/1633175637881662694.png,Folliculitis,Neck,-7432151961683759568,AT_AN_ANGLE,,fair
434,train_data/Folliculitis/-3334185101988508211.png,Folliculitis,Other,597372832999055528,CLOSE_UP,WHITE,fair
435,train_data/Folliculitis/-487619432479533708.png,Folliculitis,Legs,3905100936130110268,AT_DISTANCE,,fair


In [24]:
tagged_images.dtypes

Image Path            object
Condition             object
Body Part             object
case_id                int64
image_shot_type       object
combined_race         object
skin_type_category    object
dtype: object

In [26]:
tagged_images = tagged_images.reindex(columns=['case_id', 'Condition', 'Body Part', 'image_shot_type', 'combined_race', 'skin_type_category', 'Image Path'])

In [27]:
tagged_images

Unnamed: 0,case_id,Condition,Body Part,image_shot_type,combined_race,skin_type_category,Image Path
0,-7637240478939124697,Herpes Simplex,Arms,CLOSE_UP,WHITE,white,train_data/Herpes Simplex/-4548551644236086193...
1,2084479032845321772,Herpes Simplex,Neck,AT_DISTANCE,,brown,train_data/Herpes Simplex/8938987404946448422.png
2,-2298741642819106068,Herpes Simplex,Arms,AT_DISTANCE,BLACK_OR_AFRICAN_AMERICAN,brown,train_data/Herpes Simplex/-1340575480650711275...
3,8290822761716679927,Herpes Simplex,Hands,AT_DISTANCE,WHITE,fair,train_data/Herpes Simplex/-1545090829447090603...
4,868379715655855009,Herpes Simplex,Legs,AT_DISTANCE,WHITE,fair,train_data/Herpes Simplex/2761011390199798183.png
...,...,...,...,...,...,...,...
432,-4681186898904271014,Folliculitis,Other,CLOSE_UP,,white,train_data/Folliculitis/2432074176479085493.png
433,-7432151961683759568,Folliculitis,Neck,AT_AN_ANGLE,,fair,train_data/Folliculitis/1633175637881662694.png
434,597372832999055528,Folliculitis,Other,CLOSE_UP,WHITE,fair,train_data/Folliculitis/-3334185101988508211.png
435,3905100936130110268,Folliculitis,Legs,AT_DISTANCE,,fair,train_data/Folliculitis/-487619432479533708.png


In [28]:
tagged_images.to_csv('tagged_images_with_labels.csv', index=False)

In [30]:
tagged_images.groupby('Condition').count()

Unnamed: 0_level_0,case_id,Body Part,image_shot_type,combined_race,skin_type_category,Image Path
Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Acne,24,24,24,19,24,24
Allergic Contact Dermatitis,42,42,42,21,40,42
Eczema,137,137,137,56,130,137
Folliculitis,43,43,43,28,42,43
Herpes Simplex,21,21,21,14,20,21
Herpes Zoster,26,26,26,6,26,26
Psoriasis,27,27,27,6,24,27
Tinea,22,22,22,12,18,22
Urticaria,95,95,95,46,90,95


In [35]:
import csv
import os
import shutil

# Define the destination directory
dest_dir = 'training_imgs'

# Ensure the destination directory exists
os.makedirs(dest_dir, exist_ok=True)

# Read the CSV file
with open('tagged_images_with_labels.csv', 'r') as csvfile:
    csvreader = csv.DictReader(csvfile)

    # Iterate through each row in the CSV
    for row in csvreader:
        # Get the full image path
        source_path = row['Image Path']

        # Get just the filename
        filename = os.path.basename(source_path)

        # Construct the destination path
        dest_path = os.path.join(dest_dir, filename)

        # Move the file
        try:
            shutil.move(source_path, dest_path)
            print(f"Moved: {filename}")
        except FileNotFoundError:
            print(f"File not found: {source_path}")
        except Exception as e:
            print(f"Error moving {source_path}: {str(e)}")

print("File moving complete.")

Moved: -4548551644236086193.png
Moved: 8938987404946448422.png
Moved: -1340575480650711275.png
Moved: -1545090829447090603.png
Moved: 2761011390199798183.png
Moved: 5158179525394239842.png
Moved: 2377128044154932230.png
Moved: 7836755757138392671.png
Moved: 5997785294563888355.png
Moved: 7958401754757491846.png
Moved: -60191807411945794.png
Moved: 2519585291032607145.png
Moved: -8653997701010841176.png
Moved: 8064981422824571519.png
Moved: 7696350726318300806.png
Moved: 3223640013662199890.png
Moved: 4688741281410456578.png
Moved: 3434201760584703211.png
Moved: 3274473114853778021.png
Moved: 8652187035654812446.png
Moved: 472419190488801341.png
Moved: -454976303503844455.png
Moved: 1438874731854835012.png
Moved: 3630731220712902717.png
Moved: 8898861788754959017.png
Moved: 905167043957992633.png
Moved: -208880250508994115.png
Moved: 2941606640160585348.png
Moved: -3136247653193336048.png
Moved: 2232175051925813495.png
Moved: -1820250928982581107.png
Moved: 60032766606288611.png
Moved: 

In [36]:
tagged_images_1 = pd.read_csv('image_tags.csv')

In [37]:
import pandas as pd

def clean_image_path(path, prefix_to_remove):
    if path and isinstance(path, str):
        return path.replace(prefix_to_remove, '')
    return path

def find_matching_row(image_path, filtered_df):
    # Remove 'train_data/' and everything up to the next '/' from tagged_images path
    cleaned_tagged_image_path = '/'.join(image_path.split('/')[2:])

    for _, row in filtered_df.iterrows():
        for i in range(1, 4):
            col_name = f'image_{i}_path'
            if col_name in row and row[col_name]:
                # Remove 'dataset/images/' from filtered_case_labels path
                cleaned_filtered_image_path = clean_image_path(row[col_name], 'dataset/images/')
                if cleaned_tagged_image_path == cleaned_filtered_image_path:
                    return {
                        'case_id': row['case_id'],
                        'image_shot_type': row[f'image_{i}_shot_type'],
                        'combined_race': row['combined_race'],
                        'skin_type_category': row['skin_type_category']
                    }
    return {
        'case_id': None,
        'image_shot_type': None,
        'combined_race': None,
        'skin_type_category': None
    }

# Clean the image paths in filtered_case_labels
for col in ['image_1_path', 'image_2_path', 'image_3_path']:
    filtered_case_labels[col] = filtered_case_labels[col].apply(lambda x: clean_image_path(x, 'dataset/images/'))

# Apply the function to each row in tagged_images
tagged_images_1[['case_id', 'image_shot_type', 'combined_race', 'skin_type_category']] = tagged_images_1['Image Path'].apply(
    lambda x: pd.Series(find_matching_row(x, filtered_case_labels))
)

# Display the updated tagged_images dataframe
print(tagged_images_1)

                                           Image Path  \
0           train_data/Tinea/-3443499009691532250.png   
1   train_data/Allergic Contact Dermatitis/-532249...   
2   train_data/Allergic Contact Dermatitis/-877882...   
3   train_data/Allergic Contact Dermatitis/-431045...   
4   train_data/Allergic Contact Dermatitis/3822050...   
5   train_data/Allergic Contact Dermatitis/6060584...   
6   train_data/Allergic Contact Dermatitis/1387639...   
7   train_data/Allergic Contact Dermatitis/-676084...   
8   train_data/Allergic Contact Dermatitis/6349646...   
9   train_data/Allergic Contact Dermatitis/4785295...   
10   train_data/Herpes Zoster/-722503728208176049.png   
11   train_data/Herpes Zoster/5794288412787857146.png   
12   train_data/Folliculitis/-3290459357811788031.png   

                      Condition Body Part              case_id  \
0                         Tinea     Other  8296549785436989539   
1   Allergic Contact Dermatitis     Other -2483075134447987416   
2  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_case_labels[col] = filtered_case_labels[col].apply(lambda x: clean_image_path(x, 'dataset/images/'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_case_labels[col] = filtered_case_labels[col].apply(lambda x: clean_image_path(x, 'dataset/images/'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-v

In [38]:
tagged_images_1 = tagged_images_1.reindex(columns=['case_id', 'Condition', 'Body Part', 'image_shot_type', 'combined_race', 'skin_type_category', 'Image Path'])

In [39]:
tagged_images_1.to_csv('tagged_images_1_with_labels.csv', index=False)

In [40]:
import csv
import os
import shutil

# Define the destination directory
dest_dir = 'training_imgs'

# Ensure the destination directory exists
os.makedirs(dest_dir, exist_ok=True)

# Read the CSV file
with open('tagged_images_1_with_labels.csv', 'r') as csvfile:
    csvreader = csv.DictReader(csvfile)

    # Iterate through each row in the CSV
    for row in csvreader:
        # Get the full image path
        source_path = row['Image Path']

        # Get just the filename
        filename = os.path.basename(source_path)

        # Construct the destination path
        dest_path = os.path.join(dest_dir, filename)

        # Move the file
        try:
            shutil.move(source_path, dest_path)
            print(f"Moved: {filename}")
        except FileNotFoundError:
            print(f"File not found: {source_path}")
        except Exception as e:
            print(f"Error moving {source_path}: {str(e)}")

print("File moving complete.")

Moved: -3443499009691532250.png
Moved: -532249905180848227.png
Moved: -8778820166267554159.png
Moved: -4310457434092779564.png
Moved: 382205097892683569.png
Moved: 6060584194970876159.png
Moved: 1387639516796423603.png
Moved: -6760849338709173764.png
Moved: 6349646448404623907.png
Moved: 4785295238614687170.png
Moved: -722503728208176049.png
Moved: 5794288412787857146.png
Moved: -3290459357811788031.png
File moving complete.


In [41]:
tagged_images._append(tagged_images_1)

Unnamed: 0,case_id,Condition,Body Part,image_shot_type,combined_race,skin_type_category,Image Path
0,-7637240478939124697,Herpes Simplex,Arms,CLOSE_UP,WHITE,white,train_data/Herpes Simplex/-4548551644236086193...
1,2084479032845321772,Herpes Simplex,Neck,AT_DISTANCE,,brown,train_data/Herpes Simplex/8938987404946448422.png
2,-2298741642819106068,Herpes Simplex,Arms,AT_DISTANCE,BLACK_OR_AFRICAN_AMERICAN,brown,train_data/Herpes Simplex/-1340575480650711275...
3,8290822761716679927,Herpes Simplex,Hands,AT_DISTANCE,WHITE,fair,train_data/Herpes Simplex/-1545090829447090603...
4,868379715655855009,Herpes Simplex,Legs,AT_DISTANCE,WHITE,fair,train_data/Herpes Simplex/2761011390199798183.png
...,...,...,...,...,...,...,...
8,-2518265086444700975,Allergic Contact Dermatitis,Other,CLOSE_UP,,,train_data/Allergic Contact Dermatitis/6349646...
9,3231607339220548802,Allergic Contact Dermatitis,Hands,CLOSE_UP,,fair,train_data/Allergic Contact Dermatitis/4785295...
10,6050019040579522081,Herpes Zoster,Other,AT_AN_ANGLE,,white,train_data/Herpes Zoster/-722503728208176049.png
11,-8409059137252391480,Herpes Zoster,Other,AT_AN_ANGLE,,fair,train_data/Herpes Zoster/5794288412787857146.png


In [42]:
tagged_images.groupby('Condition').count()

Unnamed: 0_level_0,case_id,Body Part,image_shot_type,combined_race,skin_type_category,Image Path
Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Acne,24,24,24,19,24,24
Allergic Contact Dermatitis,42,42,42,21,40,42
Eczema,137,137,137,56,130,137
Folliculitis,43,43,43,28,42,43
Herpes Simplex,21,21,21,14,20,21
Herpes Zoster,26,26,26,6,26,26
Psoriasis,27,27,27,6,24,27
Tinea,22,22,22,12,18,22
Urticaria,95,95,95,46,90,95


In [43]:
print(tagged_images)

                 case_id       Condition Body Part image_shot_type  \
0   -7637240478939124697  Herpes Simplex      Arms        CLOSE_UP   
1    2084479032845321772  Herpes Simplex      Neck     AT_DISTANCE   
2   -2298741642819106068  Herpes Simplex      Arms     AT_DISTANCE   
3    8290822761716679927  Herpes Simplex     Hands     AT_DISTANCE   
4     868379715655855009  Herpes Simplex      Legs     AT_DISTANCE   
..                   ...             ...       ...             ...   
432 -4681186898904271014    Folliculitis     Other        CLOSE_UP   
433 -7432151961683759568    Folliculitis      Neck     AT_AN_ANGLE   
434   597372832999055528    Folliculitis     Other        CLOSE_UP   
435  3905100936130110268    Folliculitis      Legs     AT_DISTANCE   
436  2346463657328001892    Folliculitis     Other        CLOSE_UP   

                 combined_race skin_type_category  \
0                        WHITE              white   
1                          NaN              brown   


In [45]:
tagged_images.dtypes

case_id                int64
Condition             object
Body Part             object
image_shot_type       object
combined_race         object
skin_type_category    object
Image Path            object
dtype: object

In [46]:
for column in tagged_images.columns:
    if column != 'case_id':
        tagged_images[column] = tagged_images[column].astype(str)
        
tagged_images.dtypes

case_id                int64
Condition             object
Body Part             object
image_shot_type       object
combined_race         object
skin_type_category    object
Image Path            object
dtype: object

In [47]:
# Function to generate the text description
def generate_text(row):
    if row['Body Part'].lower() == 'other':
        return f"a {row['image_shot_type'].lower()} photo of {row['Condition'].lower()} skin condition on {row['skin_type_category'].lower()} skin"
    else:
        return f"a {row['image_shot_type'].lower()} photo of {row['Condition'].lower()} skin condition on {row['Body Part'].lower()} on {row['skin_type_category'].lower()} skin"

# Create a new DataFrame for metadata
metadata = pd.DataFrame()

# Process the image path
metadata['image_path'] = tagged_images['Image Path'].apply(lambda x: 'training_imgs/' + os.path.basename(x))

# Generate the text description
metadata['text'] = tagged_images.apply(generate_text, axis=1)

# Save the new DataFrame to a CSV file
metadata.to_csv('metadata.csv', index=False)

print("metadata.csv has been created successfully.")

metadata.csv has been created successfully.


In [48]:
def remove_underscores(text):
    return text.replace('_', ' ')

# Apply the function to the 'text' column
metadata['text'] = metadata['text'].apply(remove_underscores)

In [49]:
metadata

Unnamed: 0,image_path,text
0,training_imgs/-4548551644236086193.png,a close up photo of herpes simplex skin condit...
1,training_imgs/8938987404946448422.png,a at distance photo of herpes simplex skin con...
2,training_imgs/-1340575480650711275.png,a at distance photo of herpes simplex skin con...
3,training_imgs/-1545090829447090603.png,a at distance photo of herpes simplex skin con...
4,training_imgs/2761011390199798183.png,a at distance photo of herpes simplex skin con...
...,...,...
432,training_imgs/2432074176479085493.png,a close up photo of folliculitis skin conditio...
433,training_imgs/1633175637881662694.png,a at an angle photo of folliculitis skin condi...
434,training_imgs/-3334185101988508211.png,a close up photo of folliculitis skin conditio...
435,training_imgs/-487619432479533708.png,a at distance photo of folliculitis skin condi...


In [50]:
metadata.to_csv('metadata.csv', index=False)

In [51]:
metadata = pd.read_csv('metadata.csv')

In [52]:
metadata

Unnamed: 0,image_path,text
0,training_imgs/-4548551644236086193.png,a close up photo of herpes simplex skin condit...
1,training_imgs/8938987404946448422.png,a at distance photo of herpes simplex skin con...
2,training_imgs/-1340575480650711275.png,a at distance photo of herpes simplex skin con...
3,training_imgs/-1545090829447090603.png,a at distance photo of herpes simplex skin con...
4,training_imgs/2761011390199798183.png,a at distance photo of herpes simplex skin con...
...,...,...
432,training_imgs/2432074176479085493.png,a close up photo of folliculitis skin conditio...
433,training_imgs/1633175637881662694.png,a at an angle photo of folliculitis skin condi...
434,training_imgs/-3334185101988508211.png,a close up photo of folliculitis skin conditio...
435,training_imgs/-487619432479533708.png,a at distance photo of folliculitis skin condi...


In [10]:
!git clone https://github.com/danielgatis/rembg.git

Cloning into 'rembg'...
remote: Enumerating objects: 2000, done.[K
remote: Counting objects: 100% (2000/2000), done.[K
remote: Compressing objects: 100% (853/853), done.[K
remote: Total 2000 (delta 1201), reused 1749 (delta 1069), pack-reused 0[K
Receiving objects: 100% (2000/2000), 62.70 MiB | 30.20 MiB/s, done.
Resolving deltas: 100% (1201/1201), done.


In [12]:
import rembg.rembg.commands.s_
from PIL import Image

input_path = 'data/training_data/training_imgs/60032766606288611.png'
output_path = 'output.png'

input = Image.open(input_path)
output = remove(input)
output.save(output_path)

FileNotFoundError: [Errno 2] No such file or directory: 'data/training_data/training_imgs/60032766606288611.png'