In [21]:
import pandas as pd
import random
import numpy as np

In [22]:
df = pd.read_csv("dermacure_survey.csv", index_col = False)

In [23]:
df.head()

Unnamed: 0,ID,Specialty,Years_in_Practice,Practice_Setting,Country,Aware_of_Dermacure,First_Learned,Frequency_of_Info,Prescribed_Dermacure,Number_of_Patients,...,Ruxolitinib_Rating,Influencing_Factors,Ranking_Factors,Patient_Severity,Biologic_Naive_Percentage,Comorbidities,Suitable_Patient_Profile,Patient_Journey,Switching_Stage,Additional_Comments
0,1,Dermatology,11-20 years,Hospital-based,Italy,No,,,No,,...,2.0,Clinical efficacy,"Effectiveness, Safety, Cost, Adherence, Famili...",Mild,51-75%,Allergic Rhinitis,"Patients who failed systemic therapies, Patien...",Topical -> Biologic,After failure of topical therapies,Limited evidence available
1,2,Pediatrics,6-10 years,Community clinic,Italy,Yes,Professional conference,Occasionally,Yes,1-5,...,5.0,"Cost, Clinical efficacy","Effectiveness, Safety, Cost, Adherence, Famili...",Severe,0-25%,Allergic Rhinitis,Patients who failed topical corticosteroids,Topical -> Systemic,After failure of topical therapies,High patient satisfaction
2,3,General Practice,6-10 years,Community clinic,Germany,Yes,Professional conference,Frequently,Yes,1-5,...,3.0,"Patient demand, Safety profile","Effectiveness, Safety, Cost, Adherence, Famili...",Moderate,76-100%,,"Patients who failed systemic therapies, Patien...",Topical -> Biologic,After failure of topical therapies,Limited evidence available
3,4,Allergy/Immunology,0-5 years,Private practice,France,Yes,Medical journal/literature,Rarely,Yes,1-5,...,5.0,Safety profile,"Effectiveness, Safety, Cost, Adherence, Famili...",Severe,51-75%,"Psoriasis, Food Allergies",Patients who failed systemic therapies,Topical -> Biologic,After failure of systemic therapies,Limited evidence available
4,5,Dermatology,0-5 years,Hospital-based,France,Yes,Peer recommendation,Rarely,No,,...,,Peer recommendations,"Effectiveness, Safety, Cost, Adherence, Famili...",Mild,0-25%,"Psoriasis, Allergic Rhinitis","Patients who failed systemic therapies, Patien...",Topical -> Biologic,After failure of systemic therapies,Waiting for more data


Fix Logical Inconsistencies

In [24]:
df.loc[df['Prescribed_Dermacure'] == 'No', 'Number_of_Patients'].unique()

array([nan, '6-10', '1-5'], dtype=object)

In [25]:
df.loc[df['Prescribed_Dermacure'] == 'No', 'Number_of_Patients'].value_counts()

Number_of_Patients
6-10    2
1-5     1
Name: count, dtype: int64

In [26]:
df = df.drop(df[(df['Prescribed_Dermacure'] == 'No') & (df['Number_of_Patients'].notna())].index)

In [27]:
df.loc[df['Prescribed_Dermacure'] == 'Yes', 'Number_of_Patients'].unique()

array(['1-5', 'More than 20', '6-10', '11-20', nan], dtype=object)

In [28]:
df = df.drop(df[(df['Prescribed_Dermacure'] == 'Yes') & (df['Number_of_Patients'].isna())].index)

In [29]:
df.loc[df['Prescribed_Dermacure'] == 'Yes', 'Not_Prescribed_Reasons'].value_counts()

Not_Prescribed_Reasons
Cost issues                              1
Preference for alternative treatments    1
Insufficient clinical evidence           1
Concerns about cost                      1
Name: count, dtype: int64

In [30]:
df = df.drop(df[(df['Prescribed_Dermacure'] == 'Yes') & (df['Not_Prescribed_Reasons'].notna())].index)

Making sure that the dataset is not too small after cleaning

In [31]:
print(len(df))

231


Filling Nan values since this dataset was made with chatgpt

In [32]:
# Check if there are any remaining "N/A" values
print(df.isna().sum())

ID                             0
Specialty                      0
Years_in_Practice              0
Practice_Setting               0
Country                        0
Aware_of_Dermacure             0
First_Learned                123
Frequency_of_Info            123
Prescribed_Dermacure           0
Number_of_Patients            67
Not_Prescribed_Reasons       171
Overall_Effectiveness          0
Response_Time                  0
Safety_Profile                 0
Adverse_Effects               56
Barriers                      41
Future_Prescribing_Intent      0
Alternative_Treatments         0
Dupilumab_Rating              32
Tralokinumab_Rating           39
Upadacitinib_Rating           46
Abrocitinib_Rating            35
Ruxolitinib_Rating            34
Influencing_Factors            0
Ranking_Factors                0
Patient_Severity               0
Biologic_Naive_Percentage      0
Comorbidities                 27
Suitable_Patient_Profile       0
Patient_Journey                0
Switching_

In [33]:
# Alternatively, fill missing numeric columns with mean or median
df['First_Learned'].fillna(df['First_Learned'].mode()[0], inplace=True)
df['Frequency_of_Info'].fillna(df['Frequency_of_Info'].mode()[0], inplace=True)
df['Number_of_Patients'].fillna(0, inplace=True)
df['Not_Prescribed_Reasons'].fillna(df['Not_Prescribed_Reasons'].mode()[0], inplace=True)
df['Adverse_Effects'].fillna(df['Adverse_Effects'].mode()[0], inplace=True)
df['Barriers'].fillna(df['Barriers'].mode()[0], inplace=True)
df['Dupilumab_Rating'].fillna(df['Dupilumab_Rating'].mode()[0], inplace=True)
df['Tralokinumab_Rating'].fillna(df['Tralokinumab_Rating'].mode()[0], inplace=True)
df['Upadacitinib_Rating'].fillna(df['Upadacitinib_Rating'].mode()[0], inplace=True)
df['Abrocitinib_Rating'].fillna(df['Abrocitinib_Rating'].mode()[0], inplace=True)
df['Ruxolitinib_Rating'].fillna(df['Ruxolitinib_Rating'].mode()[0], inplace=True)
df['Comorbidities'].fillna(df['Comorbidities'].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['First_Learned'].fillna(df['First_Learned'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Frequency_of_Info'].fillna(df['Frequency_of_Info'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work beca

In [34]:
df.to_csv("dermacure_survey_clean.csv", index = False)