# Pandas

In [20]:
import pandas as pd

## Series

In [None]:
help(pd.Series)

In [None]:
s = pd.Series([1, 2, 3, 4, 5, 6])
s

In [None]:
s = pd.Series({'math':10, 'leng':7, 'art': 2})
s

In [None]:
print(s.size)
print(s.index)
print(s.dtype)

In [None]:
print(s['math'])
print(s[1])
print(s[:2])
print(s[['leng','art']])


#### Resumen descriptivo

In [None]:
s = pd.Series([1, 2, 3, 4, 2, 5, 6])

print(s.count())
print(s.value_counts())
print(s.value_counts(normalize=True))
print(s.min())
print(s.max())
print(s.var())
print(s.std())

In [None]:
print(s.describe())

In [None]:
print(s.sum())
print(s.cumsum())

#### Operariones

In [None]:
s * 2
s % 2

In [None]:
st = pd.Series(['a', 'b', 'c'])
st*4

In [None]:
help(s.apply)

In [None]:
import math
import numpy as np
s.apply(np.sqrt)

In [None]:
st.apply(str.upper)

In [None]:
def mifunc(val):
    return str(val).upper()

s.apply(lambda x: x*2+3)

st.apply(mifunc)

#### Filtrar y ordenar

In [None]:
mask  = s > 2
print(mask)

s2 = s[mask]
print(s2)
print(s)

In [None]:
s.sort_values()
s.sort_index(ascending=False)

In [None]:
sx = pd.Series(['a', 'b', None, 'c', np.nan, 'd'])
print(sx)

sy = sx.dropna()
print(sy)


In [None]:
help(sx.drop)

## DataFrames

### Creación

In [None]:
help(pd.DataFrame)

In [45]:

diccionario = {
    'nombre': ['María', 'Luis', 'Carmen', 'Antonio'],
    'edad': [18, 22, 20, 21],
    'grado': ['Economía', 'Medicina', 'Arquitectura', 'Economía'],
    'correo': ['maria@gmail.com', 'luis@yahoo.es', 'carmen@gmail.com', 'antonio@gmail.com']
}

lista_de_listas = [['María', 18],
                   ['Luis', 22],
                   ['Carmen', 20]]

lista_de_dicts = [{'Nombre': 'María', 'Edad': 18},
                  {'Nombre': 'Luis', 'Edad': 22},
                  {'Nombre': 'Carmen'}]

In [None]:
df = pd.DataFrame(diccionario)
df

In [None]:
df = pd.DataFrame(lista_de_dicts)
df

In [None]:
df = pd.DataFrame(lista_de_listas, columns=['Nombre','Edad'])
df

#### Desde fuentes

In [None]:
dfcol = pd.read_csv("../data/colesterol.csv")
print(dfcol)

In [None]:
dfcol = pd.read_csv("https://github.com/ricardoahumada/DataScienceBasics/raw/refs/heads/main/data/colesterol.csv")
print(dfcol)

In [None]:
dffr = pd.read_json('../data/frutas.json')
dffr

In [None]:
# !pip install sqlite3
import sqlite3

con_sqlt = sqlite3.connect("../data/database.db")
dfdb = pd.read_sql_query("SELECT * FROM movies", con_sqlt)
con_sqlt.close()

dfdb

### Exportar

In [None]:
# !pip install openpyxl

In [None]:
dfxl = pd.read_excel("../data/FoodMarket.xlsx", sheet_name='Purchases')
dfxl

In [73]:
dfxl.to_json('../data/Purchases.json')

In [None]:
con_sqlt = sqlite3.connect("../data/database.db")

dffr.to_sql(name="frutas", con=con_sqlt)

# con_sqlt.close()

### Atributos

In [None]:
dfxl.info()
dfcol.info()

In [None]:
dfcol.size
dfcol.shape
dfxl.shape

In [None]:
print(dfxl.columns)
print(dfxl.index)
print(dfxl.axes)
print(dfxl.values)

In [None]:
print(dfcol.dtypes)

In [None]:
dfxl.head(10)

In [None]:
dfxl.tail(10)

In [None]:
dfxl.describe()

### Renombrar y modificar: índices y columnas

In [112]:
dfcol.rename(columns={'nombre': 'nombre y apellido', 'peso': 'pesos'}, index={0: 1000, 10: 101}, inplace=True)

In [None]:
dfcol

In [None]:
dfcol.set_index("nombre y apellido")

In [None]:
dfcol.reindex(index=[4, 3, 1], columns=['edad', 'altura'])

### Acceso y filtrar

In [None]:
help(dfcol.loc)

In [None]:
help(dfcol.iloc)

In [168]:
dfcol

Unnamed: 0,nombre y apellido,edad,sexo,pesos,altura,colesterol
1000,José Luis Martínez Izquierdo,18,H,85.0,1.79,182.0
1,Rosa Díaz Díaz,32,M,65.0,1.73,232.0
2,Javier García Sánchez,24,H,,1.81,191.0
3,Carmen López Pinzón,35,M,65.0,1.7,200.0
4,Marisa López Collado,46,M,51.0,1.58,148.0
5,Antonio Ruiz Cruz,68,H,66.0,1.74,249.0
6,Antonio Fernández Ocaña,51,H,62.0,1.72,276.0
7,Pilar Martín González,22,M,60.0,1.66,
8,Pedro Gálvez Tenorio,35,H,90.0,1.94,241.0
9,Santiago Reillo Manzano,46,H,75.0,1.85,280.0


In [None]:
dfcol[['altura','pesos']]

In [None]:
dfcol.iloc[1,:3]

In [None]:
dfcol.iloc[1:5, 2:4]

In [None]:
dfcol.loc[2:, ['colesterol','pesos']]

In [None]:
mask = dfcol.loc[:, 'pesos'] > 70
print(mask)
dfcol[mask]

In [None]:
mask = dfcol.loc[:, ['pesos', 'altura']] > [70, 1.5]
mask
dfcol[mask]

In [171]:
mask = dfcol.isin([20, 1.7,'Javier García Sánchez'])
print(mask)
dfcol[mask]

      nombre y apellido   edad   sexo  pesos  altura  colesterol
1000              False  False  False  False   False       False
1                 False  False  False  False   False       False
2                  True  False  False  False   False       False
3                 False  False  False  False    True       False
4                 False  False  False  False   False       False
5                 False  False  False  False   False       False
6                 False  False  False  False   False       False
7                 False  False  False  False   False       False
8                 False  False  False  False   False       False
9                 False  False  False  False   False       False
101               False  False  False  False   False       False
11                False  False  False  False   False       False
12                False  False  False  False   False       False
13                False   True  False  False   False       False


Unnamed: 0,nombre y apellido,edad,sexo,pesos,altura,colesterol
1000,,,,,,
1,,,,,,
2,Javier García Sánchez,,,,,
3,,,,,1.7,
4,,,,,,
5,,,,,,
6,,,,,,
7,,,,,,
8,,,,,,
9,,,,,,


In [None]:
mask = ~dfcol.isin({'pesos':[75,85],'edad':[35]})
print(mask)
dfcol[mask]

      nombre y apellido   edad  sexo  pesos  altura  colesterol
1000               True   True  True  False    True        True
1                  True   True  True   True    True        True
2                  True   True  True   True    True        True
3                  True  False  True   True    True        True
4                  True   True  True   True    True        True
5                  True   True  True   True    True        True
6                  True   True  True   True    True        True
7                  True   True  True   True    True        True
8                  True  False  True   True    True        True
9                  True   True  True  False    True        True
101                True   True  True   True    True        True
11                 True   True  True   True    True        True
12                 True   True  True   True    True        True
13                 True   True  True   True    True        True


Unnamed: 0,nombre y apellido,edad,sexo,pesos,altura,colesterol
1000,José Luis Martínez Izquierdo,18.0,H,,1.79,182.0
1,Rosa Díaz Díaz,32.0,M,65.0,1.73,232.0
2,Javier García Sánchez,24.0,H,,1.81,191.0
3,Carmen López Pinzón,,M,65.0,1.7,200.0
4,Marisa López Collado,46.0,M,51.0,1.58,148.0
5,Antonio Ruiz Cruz,68.0,H,66.0,1.74,249.0
6,Antonio Fernández Ocaña,51.0,H,62.0,1.72,276.0
7,Pilar Martín González,22.0,M,60.0,1.66,
8,Pedro Gálvez Tenorio,,H,90.0,1.94,241.0
9,Santiago Reillo Manzano,46.0,H,,1.85,280.0
