## **Descarga un dataset de kaggle**

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np

## Impórtalo y haz un dataframe

In [65]:
diseases_df = pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Diseases_Symptoms.csv")

## Haz una exploración de los datos

In [66]:
diseases_df.info() # información general de los datos

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Code        400 non-null    int64 
 1   Name        400 non-null    object
 2   Symptoms    400 non-null    object
 3   Treatments  399 non-null    object
dtypes: int64(1), object(3)
memory usage: 12.6+ KB


In [67]:
diseases_df.head() # 5 primeras filas

Unnamed: 0,Code,Name,Symptoms,Treatments
0,1,Panic disorder,"Palpitations, Sweating, Trembling, Shortness o...","Antidepressant medications, Cognitive Behavior..."
1,2,Vocal cord polyp,"Hoarseness, Vocal Changes, Vocal Fatigue","Voice Rest, Speech Therapy, Surgical Removal"
2,3,Turner syndrome,"Short stature, Gonadal dysgenesis, Webbed neck...","Growth hormone therapy, Estrogen replacement t..."
3,4,Cryptorchidism,"Absence or undescended testicle(s), empty scro...",Observation and monitoring (in cases of mild o...
4,5,Ethylene glycol poisoning-1,"Nausea, vomiting, abdominal pain, General mala...","Supportive Measures, Gastric Decontamination, ..."


In [68]:
diseases_df.tail() # 5 ultimas filas

Unnamed: 0,Code,Name,Symptoms,Treatments
395,396,Urinary Stones (Kidney Stones),"Severe abdominal or back pain, blood in urine,...","Pain management, increased fluid intake, medic..."
396,397,Osteoporosis,"Fragile bones, loss of height over time, back ...","Calcium and vitamin D supplements, regular exe..."
397,398,Rheumatoid Arthritis,"Joint pain, stiffness, swelling, fatigue, loss...",Medications (nonsteroidal anti-inflammatory dr...
398,399,Type 1 Diabetes,"Frequent urination, Increased thirst, Weight loss","Insulin therapy, Blood sugar monitoring, Healt..."
399,400,Type 2 Diabetes,"Fatigue, Increased hunger, Slow healing of wounds","Oral medications, Insulin therapy (in some cas..."


## ¿Tiene nulos? ¿Cuántos? ¿En qué columnas? Haz algo con ellos 🙃

In [69]:
diseases_df.isna().sum() # Si tiene un valor nulo en la columna treatments

Unnamed: 0,0
Code,0
Name,0
Symptoms,0
Treatments,1


In [70]:
# primero voy a analizar a que otros datos está asociado este valor nulo:
diseases_df[diseases_df['Treatments'].isnull()] # el valor nulo está asociado a la "colestasis gestacional"


Unnamed: 0,Code,Name,Symptoms,Treatments
70,71,Gestational Cholestasis,"Itchy skin, particularly on the hands and feet",


In [71]:
# Busco información sobre el tratamiento de dicha enfermedad y sustituyo el valor nulo por el tratamiento.
# Como no hay tratamiento que cure la enfermedad (solo hay tratamiento para disminuir los sintomas), sustituiré el valor nulo por: 'Only treatments to manage the symptoms'
diseases_df['Treatments'] = diseases_df['Treatments'].fillna('Only treatments to manage the symptoms')

# Compruebo que el cambio se haya realizado correctamente:
diseases_df.iloc[70]

Unnamed: 0,70
Code,71
Name,Gestational Cholestasis
Symptoms,"Itchy skin, particularly on the hands and feet"
Treatments,Only treatments to manage the symptoms


In [72]:
# Compruebo que ya no hay valores nulos:
diseases_df.isna().sum()

Unnamed: 0,0
Code,0
Name,0
Symptoms,0
Treatments,0


## Borra algunas filas

In [73]:
diseases_df_filters = diseases_df.drop([1, 2, 3])
diseases_df_filters.head()

Unnamed: 0,Code,Name,Symptoms,Treatments
0,1,Panic disorder,"Palpitations, Sweating, Trembling, Shortness o...","Antidepressant medications, Cognitive Behavior..."
4,5,Ethylene glycol poisoning-1,"Nausea, vomiting, abdominal pain, General mala...","Supportive Measures, Gastric Decontamination, ..."
5,6,Ethylene glycol poisoning-2,"Metabolic acidosis, apid breathing, rapid hear...","Blood tests, Supportive Measures, Gastric Deco..."
6,7,Ethylene glycol poisoning-3,"Decreased urine output, swelling in the legs a...","Supportive Measures, Gastric Decontamination, ..."
7,8,Atrophic vaginitis,"Vaginal dryness, Vaginal burning, frequent uri...","Vaginal moisturizers, Vaginal estrogen therapy..."


## Borra algunas columnas

In [74]:
diseases_df_filters = diseases_df_filters.drop(columns=['Symptoms', 'Code'])
diseases_df_filters.head()

Unnamed: 0,Name,Treatments
0,Panic disorder,"Antidepressant medications, Cognitive Behavior..."
4,Ethylene glycol poisoning-1,"Supportive Measures, Gastric Decontamination, ..."
5,Ethylene glycol poisoning-2,"Blood tests, Supportive Measures, Gastric Deco..."
6,Ethylene glycol poisoning-3,"Supportive Measures, Gastric Decontamination, ..."
7,Atrophic vaginitis,"Vaginal moisturizers, Vaginal estrogen therapy..."


## Muestra los valores únicos de algunas columnas

In [75]:
unique_values = diseases_df['Name'].unique()
unique_values

# Este dataframe no tiene valores repetidos porque simplemente recoge enfermedades con sus sintomas y tratamientos.

array(['Panic disorder', 'Vocal cord polyp', 'Turner syndrome',
       'Cryptorchidism', 'Ethylene glycol poisoning-1',
       'Ethylene glycol poisoning-2', 'Ethylene glycol poisoning-3',
       'Atrophic vaginitis', 'Fracture', 'Cellulitis',
       'Eye alignment disorder', 'Headache after lumbar puncture',
       'Pyloric stenosis', 'Adenoid cystic carcinoma',
       'Pleomorphic adenoma', 'Warthin tumor', 'Mucoepidermoid carcinoma',
       'Acinic cell carcinoma', 'Mucocele', 'Osteochondrosis',
       'Sialolithiasis', 'Submandibular stones', 'Parotid stones',
       'Recurrent salivary stones', "Wharton's duct stones", 'Mumps',
       "Ludwig's angina", 'Abscess', "Sjögren's syndrome",
       'Open-Angle Glaucoma', 'Angle-Closure Glaucoma',
       'Normal-Tension Glaucoma', 'Congenital Glaucoma',
       'Secondary Glaucoma', 'Pigmentary Glaucoma',
       'Exfoliation Glaucoma', 'Ocular Hypertension',
       'Low-Tension Glaucoma', 'Anorexia Nervosa', 'Bulimia Nervosa',
       'Bin

## Filtra los datos por algunos de los valores que hay dentro de alguna columna

In [78]:
filter_data = diseases_df[diseases_df['Name'].str.startswith('An')]
filter_data


Unnamed: 0,Code,Name,Symptoms,Treatments
31,32,Angle-Closure Glaucoma,"Severe eye pain, headache, blurred vision, hal...","Medications (e.g., oral or intravenous acetazo..."
39,40,Anorexia Nervosa,"Extreme weight loss, fear of gaining weight, d...","Medical monitoring, nutritional counseling, ps..."
62,63,Anemia,"Fatigue, weakness, pale skin, shortness of breath","Iron supplementation, dietary changes (iron-ri..."
179,180,Antimicrobial Drugs Poisoning,"Nausea, vomiting, diarrhea, neurological symptoms","Immediate medical attention, supportive care, ..."
195,196,Anal Fissure,"Pain during bowel movements, rectal bleeding","Stool softeners, fiber supplements, topical oi..."
217,218,Anemia due to Chronic Kidney Disease,"Fatigue, weakness, pale skin","Erythropoiesis-stimulating agents (ESA), iron ..."
248,249,Antihypertensives Poisoning,"Dizziness, low blood pressure, slow heart rate...","Immediate medical attention, activated charcoa..."
255,256,Anxiety,"Excessive worrying, restlessness, increased he...","Therapy (cognitive-behavioral therapy, psychot..."
279,280,Anemia due to Malignancy,"Fatigue, weakness, shortness of breath, pale skin","Treating underlying malignancy, blood transfus..."
334,335,Anemia of Chronic Disease,"Fatigue, weakness, pale skin, shortness of bre...","Treating the underlying chronic disease, addre..."


## Repite el paso anterior con condicionales

In [79]:
filter_data2 = diseases_df[(diseases_df['Name'].str.startswith('An')) & (diseases_df['Symptoms'].str.contains('Fatigue'))]
filter_data2

Unnamed: 0,Code,Name,Symptoms,Treatments
62,63,Anemia,"Fatigue, weakness, pale skin, shortness of breath","Iron supplementation, dietary changes (iron-ri..."
217,218,Anemia due to Chronic Kidney Disease,"Fatigue, weakness, pale skin","Erythropoiesis-stimulating agents (ESA), iron ..."
279,280,Anemia due to Malignancy,"Fatigue, weakness, shortness of breath, pale skin","Treating underlying malignancy, blood transfus..."
334,335,Anemia of Chronic Disease,"Fatigue, weakness, pale skin, shortness of bre...","Treating the underlying chronic disease, addre..."


## Elige una columna y ponla como index

In [84]:
diseases_df_index = diseases_df.set_index('Code')
diseases_df_index

Unnamed: 0_level_0,Name,Symptoms,Treatments
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Panic disorder,"Palpitations, Sweating, Trembling, Shortness o...","Antidepressant medications, Cognitive Behavior..."
2,Vocal cord polyp,"Hoarseness, Vocal Changes, Vocal Fatigue","Voice Rest, Speech Therapy, Surgical Removal"
3,Turner syndrome,"Short stature, Gonadal dysgenesis, Webbed neck...","Growth hormone therapy, Estrogen replacement t..."
4,Cryptorchidism,"Absence or undescended testicle(s), empty scro...",Observation and monitoring (in cases of mild o...
5,Ethylene glycol poisoning-1,"Nausea, vomiting, abdominal pain, General mala...","Supportive Measures, Gastric Decontamination, ..."
...,...,...,...
396,Urinary Stones (Kidney Stones),"Severe abdominal or back pain, blood in urine,...","Pain management, increased fluid intake, medic..."
397,Osteoporosis,"Fragile bones, loss of height over time, back ...","Calcium and vitamin D supplements, regular exe..."
398,Rheumatoid Arthritis,"Joint pain, stiffness, swelling, fatigue, loss...",Medications (nonsteroidal anti-inflammatory dr...
399,Type 1 Diabetes,"Frequent urination, Increased thirst, Weight loss","Insulin therapy, Blood sugar monitoring, Healt..."


## Resetea el index

In [85]:
diseases_df_reset = diseases_df_index.reset_index()
diseases_df_reset

Unnamed: 0,Code,Name,Symptoms,Treatments
0,1,Panic disorder,"Palpitations, Sweating, Trembling, Shortness o...","Antidepressant medications, Cognitive Behavior..."
1,2,Vocal cord polyp,"Hoarseness, Vocal Changes, Vocal Fatigue","Voice Rest, Speech Therapy, Surgical Removal"
2,3,Turner syndrome,"Short stature, Gonadal dysgenesis, Webbed neck...","Growth hormone therapy, Estrogen replacement t..."
3,4,Cryptorchidism,"Absence or undescended testicle(s), empty scro...",Observation and monitoring (in cases of mild o...
4,5,Ethylene glycol poisoning-1,"Nausea, vomiting, abdominal pain, General mala...","Supportive Measures, Gastric Decontamination, ..."
...,...,...,...,...
395,396,Urinary Stones (Kidney Stones),"Severe abdominal or back pain, blood in urine,...","Pain management, increased fluid intake, medic..."
396,397,Osteoporosis,"Fragile bones, loss of height over time, back ...","Calcium and vitamin D supplements, regular exe..."
397,398,Rheumatoid Arthritis,"Joint pain, stiffness, swelling, fatigue, loss...",Medications (nonsteroidal anti-inflammatory dr...
398,399,Type 1 Diabetes,"Frequent urination, Increased thirst, Weight loss","Insulin therapy, Blood sugar monitoring, Healt..."


## Divídelo en dos partes aleatorias y vuélvelo a juntar ¿te atreves con filas o columnas? ¡¡prueba!!

In [88]:
# Dividir el DataFrame en dos partes aleatorias por las columnas:
diseases_df1 = diseases_df.iloc[:311]
diseases_df2 = diseases_df.iloc[311:]

print("La primera parte del dataframe tiene", len(diseases_df1), "filas")
print("La segunda parte del dataframe tiene", len(diseases_df2), "filas")

# Volver a juntar las partes:
diseases_df_combined = pd.concat([diseases_df1, diseases_df2]).reset_index(drop=True)

print("El dataframe combinado de nuevo tiene", len(diseases_df_combined), "filas")

La primera parte del dataframe tiene 311 filas
La segunda parte del dataframe tiene 89 filas
El dataframe combinado de nuevo tiene 400 filas


In [91]:
# Dividir el DataFrame en dos partes aleatorias por las filas:
diseases_df_columns1 = diseases_df.iloc[:, :1]
diseases_df_columns2 = diseases_df.iloc[:, 1:]

print("La primera parte del dataframe tiene", len(diseases_df_columns1.columns), "columnas")
print("La segunda parte del dataframe tiene", len(diseases_df_columns2.columns), "columnas")

# Volver a juntar las partes:
diseases_df_combined_columns = pd.concat([diseases_df_columns1, diseases_df_columns2], axis=1)

print("El dataframe combinado de nuevo tiene", len(diseases_df_combined_columns.columns), "columnas")

La primera parte del dataframe tiene 1 columnas
La segunda parte del dataframe tiene 3 columnas
El dataframe combinado de nuevo tiene 4 columnas
