## Create random 10 samples

In [1]:
# Import dependencies

import pandas as pd
from pathlib import Path

In [2]:
# Read in the CSV file as a Pandas DataFrame
df = pd.read_csv(
    Path("Resources/HAM10000_metadata.csv")
)

# Review the DataFrame
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [3]:
# Create patients df - random sample of 10 rows
patients = df.sample(n=10)
patients

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
8258,HAM_0000291,ISIC_0031360,nv,histo,75.0,female,lower extremity
3026,HAM_0004535,ISIC_0024364,nv,follow_up,45.0,male,upper extremity
6700,HAM_0003277,ISIC_0026890,nv,histo,30.0,male,upper extremity
8423,HAM_0004839,ISIC_0028450,nv,histo,55.0,male,back
7051,HAM_0005611,ISIC_0025850,nv,histo,35.0,female,lower extremity
3520,HAM_0000617,ISIC_0029703,nv,follow_up,75.0,male,trunk
6713,HAM_0007311,ISIC_0026367,nv,histo,45.0,male,face
7632,HAM_0001935,ISIC_0033067,nv,histo,50.0,male,chest
8136,HAM_0000721,ISIC_0030243,nv,histo,40.0,female,trunk
3100,HAM_0002151,ISIC_0029870,nv,follow_up,65.0,male,abdomen


In [5]:
# Merge the dataframes to identify rows present in df but not in patients
merged = pd.merge(df, patients, how='left', indicator=True)
filtered_df = merged[merged['_merge'] == 'left_only'].drop('_merge', axis=1)

# Display or use the filtered DataFrame
print(filtered_df.head())

# If you want to save this filtered DataFrame to a CSV file
filtered_df.to_csv('Resources/filtered_HAM10000_metadata.csv', index=False)

     lesion_id      image_id   dx dx_type   age   sex localization
0  HAM_0000118  ISIC_0027419  bkl   histo  80.0  male        scalp
1  HAM_0000118  ISIC_0025030  bkl   histo  80.0  male        scalp
2  HAM_0002730  ISIC_0026769  bkl   histo  80.0  male        scalp
3  HAM_0002730  ISIC_0025661  bkl   histo  80.0  male        scalp
4  HAM_0001466  ISIC_0031633  bkl   histo  75.0  male          ear


In [6]:
#  Save users df to csv
patients.to_csv('Resources/patients.csv', index = False)
patients

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
8258,HAM_0000291,ISIC_0031360,nv,histo,75.0,female,lower extremity
3026,HAM_0004535,ISIC_0024364,nv,follow_up,45.0,male,upper extremity
6700,HAM_0003277,ISIC_0026890,nv,histo,30.0,male,upper extremity
8423,HAM_0004839,ISIC_0028450,nv,histo,55.0,male,back
7051,HAM_0005611,ISIC_0025850,nv,histo,35.0,female,lower extremity
3520,HAM_0000617,ISIC_0029703,nv,follow_up,75.0,male,trunk
6713,HAM_0007311,ISIC_0026367,nv,histo,45.0,male,face
7632,HAM_0001935,ISIC_0033067,nv,histo,50.0,male,chest
8136,HAM_0000721,ISIC_0030243,nv,histo,40.0,female,trunk
3100,HAM_0002151,ISIC_0029870,nv,follow_up,65.0,male,abdomen
