# NumPy

In [299]:
import numpy as np # Importamos NumPy

## Uso basico

In [300]:
lista = [1, 2, 3, 4, 5, 6] # Lista normal de py
lista

[1, 2, 3, 4, 5, 6]

In [301]:
arr = np.array(lista) # Se convierte en un array de np

In [302]:
matriz = [1,2,3], [4,5,6], [7,8,9] 
matriz = np.array(matriz) # Se convierte en una matriz de dos direcciones
matriz

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

## Index y slices

In [303]:
matriz[1,2] * matriz[0,2]

18

In [304]:
arr[1:4:2]

array([2, 4])

In [305]:
matriz[::2]

array([[1, 2, 3],
       [7, 8, 9]])

In [306]:
matriz[::-1, 1:2]

array([[8],
       [5],
       [2]])

## Tipos de datos

In [307]:
arr.dtype 

dtype('int64')

In [308]:
float = np.array([1,2,3,4,5,6,7,8,9], dtype = 'float64')
float.dtype 

dtype('float64')

In [309]:
arr = arr.astype(np.bool_)
arr.dtype
arr

array([ True,  True,  True,  True,  True,  True])

In [310]:
arr = arr.astype(np.string_)
arr.dtype
arr

array([b'True', b'True', b'True', b'True', b'True', b'True'], dtype='|S5')

## Dimensiones

In [311]:
scalar = np.array(42) # Array de 0 dimensiones = scalar
scalar.ndim

0

In [312]:
vector = np.array([1,2,3]) # Array de 1 dimension = vector
vector.ndim

1

In [313]:
matriz = np.array([[1,2],[3,4]]) # Array de 2 dimensiones = matriz
matriz.ndim

2

In [314]:
tensor = np.array([[[1,2],[3,4],[5,6]],[[1,2],[3,4],[5,6]]]) # Array de 3 dimensiones = tensor
print(tensor, tensor.ndim) 

[[[1 2]
  [3 4]
  [5 6]]

 [[1 2]
  [3 4]
  [5 6]]] 3


## Operar con dimensiones

In [315]:
vector = np.array([1,2,3], ndmin=10) # Coloca un minimo de dimensiones en 10
print(vector)
vector.ndim

[[[[[[[[[[1 2 3]]]]]]]]]]


10

In [316]:
tensor_1 = np.expand_dims(tensor, axis=3) # Extiende nuestro array en la 3er dimension
print(tensor_1, tensor_1.ndim)

[[[[1]
   [2]]

  [[3]
   [4]]

  [[5]
   [6]]]


 [[[1]
   [2]]

  [[3]
   [4]]

  [[5]
   [6]]]] 4


In [317]:
np.arange(0,10, 2) # Genera un array del 0 al 10 de a 2 pasos

array([0, 2, 4, 6, 8])

In [318]:
np.zeros((10, 5)) # Genera un array de 10x5 0s

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [319]:
np.ones((10,5)) # Genera un array de 10x5 1s

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [320]:
np.linspace(0,10,20) # Genera un array del 0 al 10 con 20 datos generados

array([ 0.        ,  0.52631579,  1.05263158,  1.57894737,  2.10526316,
        2.63157895,  3.15789474,  3.68421053,  4.21052632,  4.73684211,
        5.26315789,  5.78947368,  6.31578947,  6.84210526,  7.36842105,
        7.89473684,  8.42105263,  8.94736842,  9.47368421, 10.        ])

In [321]:
np.eye(5) # Genera una matriz de 5 con la diagonal principal en 1

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [322]:
np.random.rand(3,2) # Genera una matriz con 3 numeros aleatorio entre 0 y 1

array([[0.20698009, 0.73141669],
       [0.05278222, 0.85430591],
       [0.22802244, 0.113526  ]])

In [323]:
np.random.randint(1,15,(10,5)) # Genera numeros enteros aleatorios entre el 1 y el 15 en una matriz de 10,5

array([[10,  1,  3, 12,  4],
       [ 6,  4,  2, 11, 11],
       [12,  4, 10, 14, 13],
       [ 9,  3,  2, 10,  6],
       [ 2,  7,  4,  8,  4],
       [11,  9,  4,  1, 10],
       [ 5,  4,  2,  2,  2],
       [ 1,  1,  7,  5, 13],
       [ 6,  3,  6,  1,  2],
       [ 3, 11, 12,  2, 13]])

## Shape y reshape

In [324]:
arr = np.random.randint(1,10,(3,2))
print(arr)
arr.shape # Nos dice que cantidad de valores hay en cada dimension del array

[[4 9]
 [6 3]
 [8 3]]


(3, 2)

In [325]:
arr.reshape(1,6) # Le da la forma elegida al array (siempre y cuando den los valores)

array([[4, 9, 6, 3, 8, 3]])

In [326]:
np.reshape(arr,(3,2),'C') # Hace el reshape como lo hace el lenguaje 'C'

array([[4, 9],
       [6, 3],
       [8, 3]])

In [327]:
np.reshape(arr,(3,2),'F') # Hace el reshape como lo hace el lenguaje 'Fortran'

array([[4, 9],
       [6, 3],
       [8, 3]])

In [328]:
np.reshape(arr,(2,3),'A') # Hace el reshape como lo hace el lenguaje para el que este optimizada la PC


array([[4, 9, 6],
       [3, 8, 3]])

## Funciones principales de NumPy

In [329]:
arr = np.random.randint(1,20,10)
arr

array([ 2, 16, 13,  4,  7, 17, 17, 18, 10, 19])

In [330]:
matriz = arr.reshape(2,5)
matriz

array([[ 2, 16, 13,  4,  7],
       [17, 17, 18, 10, 19]])

In [331]:
matriz.max() # Nos trae el mayor valor del array (Lo mismo con MIN)

19

In [332]:
matriz.max(1) # Nos trae el mayor valor por el eje 1 (Lo mismo con MIN)

array([16, 19])

In [333]:
matriz.argmax(0) # Nos dice en que indice esta el mayor valor de cada eje 0 (Lo mismo con MIN)

array([1, 1, 1, 1, 1])

In [334]:
matriz.ptp() # Nos dice la diferencia entre el minimo y el maximo (pico to pico)

17

In [335]:
matriz.ptp(1) # Nos dice la diferencia entre el minimo y el maximo en el eje 1

array([14,  9])

In [336]:
np.percentile(arr,50) # Nos trae el percentil 50 de nuestro array (Los numeros por debajo del porcentaje)

14.5

In [337]:
np.sort(arr) # Ordena nuestro array

array([ 2,  4,  7, 10, 13, 16, 17, 17, 18, 19])

In [338]:
np.median(arr) # Trae la mediana del arreglo

14.5

In [339]:
np.median(matriz, 0) # Trae la mediana de la matriz en el eje 0

array([ 9.5, 16.5, 15.5,  7. , 13. ])

In [340]:
np.std(arr) # La desviacion estandar del array

5.866003750424986

In [341]:
np.var(arr) # La varianza (desviacion estandar al cuadrado)

34.410000000000004

In [342]:
np.mean(arr) # La media del array

12.3

In [343]:
a = np.array([[1,2],[3,4]])
b = np.array([5,6]) # Si concatenamos asi va a tirar un error por diferencia de dimensiones
b = np.expand_dims(b,axis=0) # Para que no tire error le expandimos una dimension
c = np.concatenate((a,b), axis=0) # Concatenamos a y b
c

array([[1, 2],
       [3, 4],
       [5, 6]])

In [344]:
a = np.array([[1,2],[3,4]])
b = np.array([5,6]) # Si concatenamos asi va a tirar un error por diferencia de dimensiones
b = np.expand_dims(b,axis=1) # Para que no tire error le expandimos una dimension pero en Y, lo que va a tirar un error
c = np.concatenate((a,b.T), axis=0) # Concatenamos a y b agregando la transpuesta (b.T) para que no tire error por las dimensiones
c

array([[1, 2],
       [3, 4],
       [5, 6]])

### Copy

In [345]:
arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [346]:
trozo_de_arr = arr[0:6]
trozo_de_arr

array([0, 1, 2, 3, 4, 5])

In [347]:
trozo_de_arr[:] = 0 # Colocamos todos los valores en 0 de nuestro trozo en 0
trozo_de_arr
arr # Pero podemos ver que nos arruina el array original tambien

array([ 0,  0,  0,  0,  0,  0,  6,  7,  8,  9, 10])

In [348]:
arr_copy = arr.copy() # Genera una copia de nuestro array
arr_copy[:] = 100 # Cambiamos todos los valores
print(arr_copy) # Modificamos el copy...
print(arr) # Pero nuestro array original sigue igual

[100 100 100 100 100 100 100 100 100 100 100]
[ 0  0  0  0  0  0  6  7  8  9 10]


### Condiciones

In [349]:
arr = np.linspace(1,10,10,dtype='int8')
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=int8)

In [350]:
indice_cond = arr > 5 # Trae una lista booleana de los valores que cumplen con la condicion (Py plano)
indice_cond

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [351]:
arr[indice_cond] # Trae en el array solo los que cumplen la relacion

array([ 6,  7,  8,  9, 10], dtype=int8)

In [430]:
arr[(arr > 5) & (arr < 9)] # Trae los que cumplen mas de una condicion

array([6, 7, 8])

In [353]:
arr[arr > 5] = 99 # Cambia el valor de los que cumplen la relacion
arr

array([ 1,  2,  3,  4,  5, 99, 99, 99, 99, 99], dtype=int8)

## Operaciones

In [354]:
lista = [1, 2]
print(lista)
lista * 2 # Nos da dos veces nuestros datos, no los multiplica por dos ya que es una lista basica de python

[1, 2]


[1, 2, 1, 2]

In [355]:
arr = np.arange(0,10)
arr2 = arr.copy()

### Operaciones aritmeticas

In [356]:
arr * 2 # Si multiplica los valores

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [357]:
arr + 2 # Si le suma 2 a todos los valores y todas las operaciones aritmeticas que python no podria

array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [358]:
arr * arr2 # Multiplica los dos arrays dependiendo el indice

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

In [359]:
matriz = arr.reshape(2,5)
matriz2 = matriz.copy()
matriz + matriz2 # Suma los valores de la matriz segun el indice

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18]])

In [360]:
np.matmul(matriz, matriz2.T) # Saca el producto punto (Debe tener la transpuesta de una de las dos matrices)

array([[ 30,  80],
       [ 80, 255]])

In [361]:
matriz = np.array([1,1,2,2,2,2,3,4,5,6,7,8,8,8,8,8])
unique = np.unique(matriz) # Nos trae los valores sin repetir
unique

array([1, 2, 3, 4, 5, 6, 7, 8])

# Pandas

## Series y dataframes

In [362]:
import pandas as pd

In [363]:
psg_players = pd.Series(['Navas', 'Mbappe', 'Neymar', 'Messi'],
          index=[1,7,10,30]
          )
psg_players

1      Navas
7     Mbappe
10    Neymar
30     Messi
dtype: object

In [364]:
psg_players  = {1:'Navas', 7:'Mbappe', 10:'Neymar', 30:'Messi'}
pd.Series(psg_players)

1      Navas
7     Mbappe
10    Neymar
30     Messi
dtype: object

In [365]:
dict = {'Jugador':['Luis Suárez','Jorge Molina', 'Antonio Puertas', 'Germán Sánchez', 'Luis Milla', 'Luís Manuel Arantes Maximiano'],
      'Posición':['Delantero', 'Delantero', 'Centrocampista', 'Defensa', 'Centrocampista', 'Portero'],
      'Altura':[185.0, 187.0, 185.0, 187.0, 175.0, 190.0],
      'Goles':[7, 7, 5, 2, 2, 0],
 }
df_Players = pd.DataFrame(dict)

In [366]:
df_Players.columns

Index(['Jugador', 'Posición', 'Altura', 'Goles'], dtype='object')

In [367]:
df_Players.index

RangeIndex(start=0, stop=6, step=1)

In [368]:
pd.DataFrame(dict, index = [9, 23, 10, 6, 5, 1])

Unnamed: 0,Jugador,Posición,Altura,Goles
9,Luis Suárez,Delantero,185.0,7
23,Jorge Molina,Delantero,187.0,7
10,Antonio Puertas,Centrocampista,185.0,5
6,Germán Sánchez,Defensa,187.0,2
5,Luis Milla,Centrocampista,175.0,2
1,Luís Manuel Arantes Maximiano,Portero,190.0,0


## Importacion de archivos

In [369]:
original = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Curso np y pd/bestsellers-with-categories_e591527f-ae45-4fa5-b0d1-d50142128fa6.csv') # Con pd.read... podremos elegir que tipo de archivo vamos a importar con sus caracteristicas

In [370]:
df_books = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Curso np y pd/bestsellers-with-categories_e591527f-ae45-4fa5-b0d1-d50142128fa6.csv', sep=',', header=0, names=['NAME', 'AUTHOR', 'U.R', 'Reviews', 'Price', 'Year', 'Genre']) # Le aclaramos cual es el caracter separador, el header y los names

In [371]:
df_books.columns

Index(['NAME', 'AUTHOR', 'U.R', 'Reviews', 'Price', 'Year', 'Genre'], dtype='object')

In [372]:
pd.read_json('/content/drive/MyDrive/Colab Notebooks/Curso np y pd/hpcaractersdatadraw.json', typ = 'Series') # Con read_json podemos elegir el tipo en que nos interesa ver la data

0       {'Name': 'Mrs. Abbott', 'Link': 'https://www.h...
1       {'Name': 'Hannah Abbott', 'Link': 'https://www...
2       {'Name': 'Abel Treetops', 'Link': 'https://www...
3       {'Name': 'Euan Abercrombie', 'Link': 'https://...
4       {'Name': 'Aberforth Dumbledore', 'Link': 'http...
                              ...                        
1935    {'Name': 'Georgi Zdravko', 'Link': 'https://ww...
1936    {'Name': 'Zograf', 'Link': 'https://www.hp-lex...
1937    {'Name': 'Zonko', 'Link': 'https://www.hp-lexi...
1938    {'Name': 'Valentina Vázquez', 'Link': 'https:/...
1939    {'Name': 'Zygmunt Budge', 'Link': 'https://www...
Length: 1940, dtype: object

In [373]:
df_books[0:4] # Traemos el slice de la tabla

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction


In [374]:
df_books[['NAME', 'AUTHOR', 'Genre']] # Traemos solo las columnas que queremos

Unnamed: 0,NAME,AUTHOR,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,Non Fiction
1,11/22/63: A Novel,Stephen King,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,Non Fiction
3,1984 (Signet Classics),George Orwell,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,Non Fiction
...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,Fiction
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,Non Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,Non Fiction


### Filtrado con iloc y loc

In [375]:
df_books.loc[0:4, ['NAME', 'AUTHOR']] # Metemos los dos filtros anteriores en uno

Unnamed: 0,NAME,AUTHOR
0,10-Day Green Smoothie Cleanse,JJ Smith
1,11/22/63: A Novel,Stephen King
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson
3,1984 (Signet Classics),George Orwell
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids


In [376]:
df_books.loc[:, ['Reviews']] * -1 # Hacemos operaciones con los campos numericos existentes

Unnamed: 0,Reviews
0,-17350
1,-2052
2,-18979
3,-21424
4,-7665
...,...
545,-9413
546,-14331
547,-14331
548,-14331


In [377]:
df_books.loc[:,['AUTHOR']] == 'JJ Smith' # Filtros booleanos con el filtrado

Unnamed: 0,AUTHOR
0,True
1,False
2,False
3,False
4,False
...,...
545,False
546,False
547,False
548,False


In [378]:
df_books.iloc[1, 3] * -1 # Es igual a loc pero filtrando por INDICE (por eso I loc)

-2052

In [379]:
df_books.iloc[:2, 2:] # Trae los registros hasta un numero y las columnas desde un numero hasta el final

Unnamed: 0,U.R,Reviews,Price,Year,Genre
0,4.7,17350,8,2016,Non Fiction
1,4.6,2052,22,2011,Fiction


## Agregar o eliminar datos

### Borrar columnas 

In [380]:
df_books.head(2) # Imprime los primeros 2 resultados

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction


In [381]:
df_books_copy = df_books.copy()
df_books_copy.drop('Genre', axis=1).head(2) # Imprime la data eliminando una columna pero solo para esta impresion

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011


In [382]:
df_books_copy2 = df_books.copy()
df_books_copy2.drop('Genre', axis=1, inplace=True) # Imprime la data eliminando una columna para siempre
df_books_copy2.head(2) # Vuelve a imprimir para corroborar que se borro

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011


In [383]:
df_books_copy3 = df_books.copy()
df_books_copy3 = df_books_copy3.drop('Genre', axis=1) # Otra forma de eliminar permanentemente una columna es reescribiendo la variable por su variable con el drop (sin el inplace)
df_books_copy3.head(2)

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011


### Borrar filas

In [384]:
df_books.drop([0,1,2], axis=0).head(2) # Borra el registro o fila numero 0, 1 y 2

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction


In [385]:
df_books_copy = df_books.copy()
df_books_copy.drop([0,1,2], axis=0, inplace=True) # Borra el registro o fila numero 0, 1 y 2
df_books_copy.head(2)

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction


### Agregar columnas

In [386]:
df_books['Nueva_Columna'] = np.nan  # Crea una columna NaN de numpy en la tabla
df_books.head(2)

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre,Nueva_Columna
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,


In [387]:
data = np.arange(0, df_books.shape[0]) # Guarda toda la data de las rows

In [388]:
df_books['Rango'] = data # Crea una columna con los valores que guardamos en data, entonces crea un indice nuevo
df_books

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre,Nueva_Columna,Rango
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,,0
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,,1
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,,2
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,,3
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,,4
...,...,...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction,,545
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction,,546
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction,,547
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction,,548


### Agregar filas

In [389]:
df_books.append(df_books) # Agrega las filas de si mismo a si mismo (Lo duplica)

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre,Nueva_Columna,Rango
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,,0
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,,1
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,,2
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,,3
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,,4
...,...,...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction,,545
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction,,546
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction,,547
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction,,548


In [390]:
## Manejar valores nulos

In [391]:
dict = {'Col1': [1,2,3,np.nan],
 'Col2': [4,np.nan,6,7],
 'Col3': ['a','b','c',None],
 }

df = pd.DataFrame(dict) # Cremos un dataframe con nuestro dict que contiene datos nulos
df

Unnamed: 0,Col1,Col2,Col3
0,1.0,4.0,a
1,2.0,,b
2,3.0,6.0,c
3,,7.0,


In [392]:
df.isnull()*1 # Trae el booleano (reemplazado con 0 o 1) si es o no nulo el valor 

Unnamed: 0,Col1,Col2,Col3
0,0,0,0
1,0,1,0
2,0,0,0
3,1,0,1


In [393]:
df.fillna('Missing') # Reemplaza los valores nulos con el string 'Missing' o el valor que queramos

Unnamed: 0,Col1,Col2,Col3
0,1.0,4.0,a
1,2.0,Missing,b
2,3.0,6.0,c
3,Missing,7.0,Missing


In [394]:
df.fillna(df.mean()) # Reemplaza los valores nulos con el promedio

  """Entry point for launching an IPython kernel.


Unnamed: 0,Col1,Col2,Col3
0,1.0,4.0,a
1,2.0,5.666667,b
2,3.0,6.0,c
3,2.0,7.0,


In [395]:
df.interpolate() # Intenta colocar el valor que matematicamente deberia ir si es que los datos estan estructurados

Unnamed: 0,Col1,Col2,Col3
0,1.0,4.0,a
1,2.0,5.0,b
2,3.0,6.0,c
3,3.0,7.0,


In [396]:
df.dropna() # Elimina los valores nulos

Unnamed: 0,Col1,Col2,Col3
0,1.0,4.0,a
2,3.0,6.0,c


## Filtrado por condiciones

In [397]:
mayor_a2016 = df_books ['Year'] > 2016 # Filtra al igual que pandas
solo_de_ficcion = df_books['Genre'] == 'Fiction'

In [398]:
df_books[mayor_a2016 & -solo_de_ficcion]  # Trae la tabla filtrando los mayores a 2016 y que NO son de ficcion (gracias al simbolo - antes del filtro)

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre,Nueva_Columna,Rango
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,,2
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,,4
8,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,4.7,5983,3,2018,Non Fiction,,8
26,Astrophysics for People in a Hurry,Neil deGrasse Tyson,4.7,9374,9,2017,Non Fiction,,26
32,Becoming,Michelle Obama,4.8,61133,11,2018,Non Fiction,,32
...,...,...,...,...,...,...,...,...,...
526,What Happened,Hillary Rodham Clinton,4.6,5492,18,2017,Non Fiction,,526
536,Whose Boat Is This Boat?: Comments That Don't ...,The Staff of The Late Show with,4.6,6669,12,2018,Non Fiction,,536
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction,,547
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction,,548


## Funciones principales de Pandas

In [399]:
df_books.info() # Nos trae toda la informacion relevante de nuestro dataframe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 550 entries, 0 to 549
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   NAME           550 non-null    object 
 1   AUTHOR         550 non-null    object 
 2   U.R            550 non-null    float64
 3   Reviews        550 non-null    int64  
 4   Price          550 non-null    int64  
 5   Year           550 non-null    int64  
 6   Genre          550 non-null    object 
 7   Nueva_Columna  0 non-null      float64
 8   Rango          550 non-null    int64  
dtypes: float64(2), int64(4), object(3)
memory usage: 38.8+ KB


In [400]:
df_books.describe() # Nos trae informacion estadistica relevante solo de las columnas numericas

Unnamed: 0,U.R,Reviews,Price,Year,Nueva_Columna,Rango
count,550.0,550.0,550.0,550.0,0.0,550.0
mean,4.618364,11953.281818,13.1,2014.0,,274.5
std,0.22698,11731.132017,10.842262,3.165156,,158.915596
min,3.3,37.0,0.0,2009.0,,0.0
25%,4.5,4058.0,7.0,2011.0,,137.25
50%,4.7,8580.0,11.0,2014.0,,274.5
75%,4.8,17253.25,16.0,2017.0,,411.75
max,4.9,87841.0,105.0,2019.0,,549.0


In [401]:
df_books.tail(2) # Nos trae los N ultimos registros (al contrario que .head) (Se ve distinto ya que se habilito la tabla interactiva)

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre,Nueva_Columna,Rango
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction,,548
549,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2019,Non Fiction,,549


In [402]:
df_books.head(2) # Nos trae los N primeros registros (al contrario que .tail)

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre,Nueva_Columna,Rango
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,,0
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,,1


In [403]:
df_books.memory_usage(deep=True) # Trae informacion de cuento esta pesando cada parte de nuestro dataframe

Index              128
NAME             59737
AUTHOR           39078
U.R               4400
Reviews           4400
Price             4400
Year              4400
Genre            36440
Nueva_Columna     4400
Rango             4400
dtype: int64

In [404]:
df_books['AUTHOR'].value_counts() # Nos trae que cantidad de valores unicos que hay

Jeff Kinney                           12
Gary Chapman                          11
Rick Riordan                          11
Suzanne Collins                       11
American Psychological Association    10
                                      ..
Keith Richards                         1
Chris Cleave                           1
Alice Schertle                         1
Celeste Ng                             1
Adam Gasiewski                         1
Name: AUTHOR, Length: 248, dtype: int64

In [405]:
df_books.drop_duplicates() # Elimina todos los registros duplicados y por defecto deja el primero

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre,Nueva_Columna,Rango
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,,0
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,,1
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,,2
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,,3
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,,4
...,...,...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction,,545
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction,,546
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction,,547
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction,,548


In [406]:
df_books.drop_duplicates(keep='last') # Elimina todos los registros duplicados dejando el ultimo que ingreso 

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre,Nueva_Columna,Rango
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,,0
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,,1
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,,2
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,,3
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,,4
...,...,...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction,,545
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction,,546
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction,,547
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction,,548


In [407]:
df_books.sort_values('Year', ascending = False) # Ordena los valores por una columna descendientemente

Unnamed: 0,NAME,AUTHOR,U.R,Reviews,Price,Year,Genre,Nueva_Columna,Rango
549,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2019,Non Fiction,,549
294,School Zone - Big Preschool Workbook - Ages 4 ...,School Zone,4.8,23047,6,2019,Non Fiction,,294
489,The Wonderful Things You Will Be,Emily Winfield Martin,4.9,8842,10,2019,Fiction,,489
263,P is for Potty! (Sesame Street) (Lift-the-Flap),Naomi Kleinberg,4.7,10820,5,2019,Non Fiction,,263
130,"Girl, Wash Your Face: Stop Believing the Lies ...",Rachel Hollis,4.6,22288,12,2019,Non Fiction,,130
...,...,...,...,...,...,...,...,...,...
418,The Last Olympian (Percy Jackson and the Olymp...,Rick Riordan,4.8,4628,7,2009,Fiction,,418
38,"Breaking Dawn (The Twilight Saga, Book 4)",Stephenie Meyer,4.6,9769,13,2009,Fiction,,38
92,"Eat This, Not That! Thousands of Simple Food S...",David Zinczenko,4.3,956,14,2009,Non Fiction,,92
139,Good to Great: Why Some Companies Make the Lea...,Jim Collins,4.5,3457,14,2009,Non Fiction,,139


## Group by (como SQL)

In [408]:
df_books.groupby('AUTHOR').count() # Nos ordena por una columna de lo que queremos (en este caso count)

Unnamed: 0_level_0,NAME,U.R,Reviews,Price,Year,Genre,Nueva_Columna,Rango
AUTHOR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Abraham Verghese,2,2,2,2,2,2,0,2
Adam Gasiewski,1,1,1,1,1,1,0,1
Adam Mansbach,1,1,1,1,1,1,0,1
Adir Levy,1,1,1,1,1,1,0,1
Admiral William H. McRaven,1,1,1,1,1,1,0,1
...,...,...,...,...,...,...,...,...
Walter Isaacson,3,3,3,3,3,3,0,3
William Davis,2,2,2,2,2,2,0,2
William P. Young,2,2,2,2,2,2,0,2
Wizards RPG Team,3,3,3,3,3,3,0,3


In [409]:
df_books.groupby('AUTHOR').count().loc['William Davis'] # Ingresamos como si el la columna por la que ordenamos fuese un indice

NAME             2
U.R              2
Reviews          2
Price            2
Year             2
Genre            2
Nueva_Columna    0
Rango            2
Name: William Davis, dtype: int64

In [410]:
df_books.groupby('AUTHOR').count().reset_index() # El index vuelve a la normalidad

Unnamed: 0,AUTHOR,NAME,U.R,Reviews,Price,Year,Genre,Nueva_Columna,Rango
0,Abraham Verghese,2,2,2,2,2,2,0,2
1,Adam Gasiewski,1,1,1,1,1,1,0,1
2,Adam Mansbach,1,1,1,1,1,1,0,1
3,Adir Levy,1,1,1,1,1,1,0,1
4,Admiral William H. McRaven,1,1,1,1,1,1,0,1
...,...,...,...,...,...,...,...,...,...
243,Walter Isaacson,3,3,3,3,3,3,0,3
244,William Davis,2,2,2,2,2,2,0,2
245,William P. Young,2,2,2,2,2,2,0,2
246,Wizards RPG Team,3,3,3,3,3,3,0,3


In [411]:
df_books.groupby('AUTHOR').agg(['min','max']) # Ordena y trae el minimo y maximo de todas las columnas agrupadas por lo que queramos

Unnamed: 0_level_0,NAME,NAME,U.R,U.R,Reviews,Reviews,Price,Price,Year,Year,Genre,Genre,Nueva_Columna,Nueva_Columna,Rango,Rango
Unnamed: 0_level_1,min,max,min,max,min,max,min,max,min,max,min,max,min,max,min,max
AUTHOR,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
Abraham Verghese,Cutting for Stone,Cutting for Stone,4.6,4.6,4866,4866,11,11,2010,2011,Fiction,Fiction,,,56,57
Adam Gasiewski,Milk and Vine: Inspirational Quotes From Class...,Milk and Vine: Inspirational Quotes From Class...,4.4,4.4,3113,3113,6,6,2017,2017,Non Fiction,Non Fiction,,,232,232
Adam Mansbach,Go the F**k to Sleep,Go the F**k to Sleep,4.8,4.8,9568,9568,9,9,2011,2011,Fiction,Fiction,,,133,133
Adir Levy,What Should Danny Do? (The Power to Choose Ser...,What Should Danny Do? (The Power to Choose Ser...,4.8,4.8,8170,8170,13,13,2019,2019,Fiction,Fiction,,,529,529
Admiral William H. McRaven,Make Your Bed: Little Things That Can Change Y...,Make Your Bed: Little Things That Can Change Y...,4.7,4.7,10199,10199,11,11,2017,2017,Non Fiction,Non Fiction,,,227,227
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Walter Isaacson,Leonardo da Vinci,Steve Jobs,4.5,4.6,3014,7827,20,21,2011,2017,Non Fiction,Non Fiction,,,214,302
William Davis,"Wheat Belly: Lose the Wheat, Lose the Weight, ...","Wheat Belly: Lose the Wheat, Lose the Weight, ...",4.4,4.4,7497,7497,6,6,2012,2013,Non Fiction,Non Fiction,,,531,532
William P. Young,The Shack: Where Tragedy Confronts Eternity,The Shack: Where Tragedy Confronts Eternity,4.6,4.6,19720,19720,8,8,2009,2017,Fiction,Fiction,,,459,460
Wizards RPG Team,Player's Handbook (Dungeons & Dragons),Player's Handbook (Dungeons & Dragons),4.8,4.8,16990,16990,27,27,2017,2019,Fiction,Fiction,,,265,267


In [412]:
df_books.groupby('AUTHOR').agg({'Reviews':['min','max'], 'Price':'sum'}) #Podemos agrupar y traer por el criterio que queramos mediante un objeto

Unnamed: 0_level_0,Reviews,Reviews,Price
Unnamed: 0_level_1,min,max,sum
AUTHOR,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Abraham Verghese,4866,4866,22
Adam Gasiewski,3113,3113,6
Adam Mansbach,9568,9568,9
Adir Levy,8170,8170,13
Admiral William H. McRaven,10199,10199,11
...,...,...,...
Walter Isaacson,3014,7827,61
William Davis,7497,7497,12
William P. Young,19720,19720,16
Wizards RPG Team,16990,16990,81


## Merge y concat

In [413]:
df1 = pd.DataFrame({
    'A':['A0', 'A1','A2','A3'],
    'B':['B0', 'B1','B2','B3'],
    'C':['C0', 'C1','C2','C3'],
    'D':['D0', 'D1','D2','D3'],
 })

df2 = pd.DataFrame({
    'A':['A5', 'A6','A7','A8'],
    'B':['B5', 'B6','B7','B8'],
    'C':['C5', 'C6','C7','C8'],
    'D':['D5', 'D6','D7','D8'],
 }) # Creamos nuestros propios dataframe

In [414]:
pd.concat([df1, df2], ignore_index= True) # Concatena (Por default el axis 0) nuestros dataframes reseteando los indices

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A5,B5,C5,D5
5,A6,B6,C6,D6
6,A7,B7,C7,D7
7,A8,B8,C8,D8


In [415]:
pd.concat([df1, df2], axis=1, ignore_index= True) # Concatena de manera horizontal

Unnamed: 0,0,1,2,3,4,5,6,7
0,A0,B0,C0,D0,A5,B5,C5,D5
1,A1,B1,C1,D1,A6,B6,C6,D6
2,A2,B2,C2,D2,A7,B7,C7,D7
3,A3,B3,C3,D3,A8,B8,C8,D8


In [416]:
izq = pd.DataFrame({'key':['k0','k1','k2','k3',],
 'A':['A5', 'A6','A7','A8'],
 'B':['B5', 'B6','B7','B8'],})

der = pd.DataFrame({'key':['k0','k1','k2','k3',],
 'C':['C5', 'C6','C7','C8'],
 'D':['D5', 'D6','D7','D8'],})

izq.merge(der, on='key') # Hace un join de un lado a otro

Unnamed: 0,key,A,B,C,D
0,k0,A5,B5,C5,D5
1,k1,A6,B6,C6,D6
2,k2,A7,B7,C7,D7
3,k3,A8,B8,C8,D8


In [417]:
izq = pd.DataFrame({'key':['k0','k1','k2','k3',],
 'A':['A5', 'A6','A7','A8'],
 'B':['B5', 'B6','B7','B8'],})

der = pd.DataFrame({'key_2':['k0','k1','k2','k3',],
 'C':['C5', 'C6','C7','C8'],
 'D':['D5', 'D6','D7','D8'],})

izq.merge(der, left_on='key', right_on='key_2') # Hace un join de un lado a otro aunque no tengan la misma columna para el ON

Unnamed: 0,key,A,B,key_2,C,D
0,k0,A5,B5,k0,C5,D5
1,k1,A6,B6,k1,C6,D6
2,k2,A7,B7,k2,C7,D7
3,k3,A8,B8,k3,C8,D8


In [418]:
izq = pd.DataFrame({'key':['k0','k1','k2','k3',],
 'A':['A5', 'A6','A7','A8'],
 'B':['B5', 'B6','B7','B8'],})

der = pd.DataFrame({'key_2':['k0','k1','k2',np.nan,],
 'C':['C5', 'C6','C7','C8'],
 'D':['D5', 'D6','D7','D8'],})

izq.merge(der, left_on='key', right_on='key_2') # Hace un join pero al no tener un tercer un cuarto valor en key_2, no trae la fila

Unnamed: 0,key,A,B,key_2,C,D
0,k0,A5,B5,k0,C5,D5
1,k1,A6,B6,k1,C6,D6
2,k2,A7,B7,k2,C7,D7


In [419]:
izq = pd.DataFrame({'key':['k0','k1','k2','k3',],
 'A':['A5', 'A6','A7','A8'],
 'B':['B5', 'B6','B7','B8'],})

der = pd.DataFrame({'key_2':['k0','k1','k2',np.nan,],
 'C':['C5', 'C6','C7','C8'],
 'D':['D5', 'D6','D7','D8'],})

izq.merge(der, left_on='key', right_on='key_2', how='left') # Soluciona el join anterior con el how respetando el left

Unnamed: 0,key,A,B,key_2,C,D
0,k0,A5,B5,k0,C5,D5
1,k1,A6,B6,k1,C6,D6
2,k2,A7,B7,k2,C7,D7
3,k3,A8,B8,,,


## Join (index match)

In [420]:
df1 = pd.DataFrame({
    'B':['B0','B1','B2',],
    'A':['A5', 'A6','A7']},
    index=['k0','k1','k2',])

df2 = pd.DataFrame({
    'C':['C0','C1','C2',],
    'D':['D0', 'D1','D2']},
    index=['k0','k2','k3',])

df1.join(df2) # Hace un join en base a los indices

Unnamed: 0,B,A,C,D
k0,B0,A5,C0,D0
k1,B1,A6,,
k2,B2,A7,C1,D1


In [421]:
df1 = pd.DataFrame({
    'B':['B0','B1','B2',],
    'A':['A5', 'A6','A7']},
    index=['k0','k1','k2',])

df2 = pd.DataFrame({
    'C':['C0','C1','C2',],
    'D':['D0', 'D1','D2']},
    index=['k0','k2','k3',])

df1.join(df2, how='inner') # Hace un join en respetando un inner (en este caso k0 y k2)

Unnamed: 0,B,A,C,D
k0,B0,A5,C0,D0
k2,B2,A7,C1,D1


## Pivot table

In [422]:
df_books.pivot_table(index='Genre',columns='Year', values='Price',aggfunc='sum') # Trae algo similar a una tabla dinamica
# Ordena por generos la suma de los precios en los años

Year,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Fiction,374,194,244,258,257,295,159,240,212,184,187
Non Fiction,396,480,511,507,473,437,362,419,357,342,317


## Melt

In [423]:
original.melt(id_vars='Year',value_vars='Genre').head(5) # Podemos traer como el valor de una fila el nombre de la columna y a su lado el valor que traia (y con id_vars podemos traer una columna sin afectarla)

Unnamed: 0,Year,variable,value
0,2016,Genre,Non Fiction
1,2011,Genre,Fiction
2,2018,Genre,Non Fiction
3,2017,Genre,Fiction
4,2019,Genre,Non Fiction


In [424]:
## Apply

In [425]:
def two_times(value):
  return value * 2

original['Rating_2'] = original['User Rating'].apply(two_times) # Aplica nuestra funcion a todos los valores que le pedimos
original[['Rating_2', 'User Rating']]

Unnamed: 0,Rating_2,User Rating
0,9.4,4.7
1,9.2,4.6
2,9.4,4.7
3,9.4,4.7
4,9.6,4.8
...,...,...
545,9.8,4.9
546,9.4,4.7
547,9.4,4.7
548,9.4,4.7


In [426]:
original['Rating_2'] = original['User Rating'].apply(lambda x : x * 3) # Aplica una lambda a todos los valores que le pedimos
original[['Rating_2', 'User Rating']]

Unnamed: 0,Rating_2,User Rating
0,14.1,4.7
1,13.8,4.6
2,14.1,4.7
3,14.1,4.7
4,14.4,4.8
...,...,...
545,14.7,4.9
546,14.1,4.7
547,14.1,4.7
548,14.1,4.7


In [427]:
original['Rating_2'] = original.apply(lambda x : x['User Rating'] * 2 if x['Genre'] == 'Fiction' else x['User Rating'], axis = 1)
original[['Rating_2', 'User Rating']] # Utilizamos una funcion lambda con una condicional dentro del apply super rapido

Unnamed: 0,Rating_2,User Rating
0,4.7,4.7
1,9.2,4.6
2,4.7,4.7
3,9.4,4.7
4,4.8,4.8
...,...,...
545,9.8,4.9
546,4.7,4.7
547,4.7,4.7
548,4.7,4.7
