## Operaciones con columnas y filas:

In [2]:
import pandas as pd

# Cargamos un dataset de peliculas
data = pd.read_csv('http://bit.ly/imdbratings')
data.head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."


In [5]:
#con axis especificamos el eje:

# axis=1 = Filas
# axis=0 Columnas

# Eliminamos una columna
data.drop('title', axis=1).head()

# podemos eliminar un conjunto de columnas:
data.drop(['content_rating', 'duration'], axis=1, inplace=True)
data.head()

Unnamed: 0,star_rating,title,genre,actors_list
0,9.3,The Shawshank Redemption,Crime,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,Crime,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,Crime,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,Action,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,Crime,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."


In [6]:
#Podemos eliminar filas:

data.drop([0,1,2], axis=0, inplace=True)
data.head()

Unnamed: 0,star_rating,title,genre,actors_list
3,9.0,The Dark Knight,Action,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,Crime,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."
5,8.9,12 Angry Men,Drama,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."
6,8.9,"The Good, the Bad and the Ugly",Western,"[u'Clint Eastwood', u'Eli Wallach', u'Lee Van ..."
7,8.9,The Lord of the Rings: The Return of the King,Adventure,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."


## Ordenamiento:

In [8]:
#Cargamos de nuevo:
data = pd.read_csv('http://bit.ly/imdbratings')

#Ordenamiento una columna:
data['duration'].sort_values()

#descendente:
data['duration'].sort_values(ascending=False).head()

476    242
157    238
78     229
142    224
445    220
Name: duration, dtype: int64

In [9]:
#Ordenamiento de todo el dataset, por una columna:

#menor a mayor duracion
data.sort_values('duration').head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
389,8.0,Freaks,UNRATED,Drama,64,"[u'Wallace Ford', u'Leila Hyams', u'Olga Bacla..."
338,8.0,Battleship Potemkin,UNRATED,History,66,"[u'Aleksandr Antonov', u'Vladimir Barsky', u'G..."
258,8.1,The Cabinet of Dr. Caligari,UNRATED,Crime,67,"[u'Werner Krauss', u'Conrad Veidt', u'Friedric..."
293,8.1,Duck Soup,PASSED,Comedy,68,"[u'Groucho Marx', u'Harpo Marx', u'Chico Marx']"
88,8.4,The Kid,NOT RATED,Comedy,68,"[u'Charles Chaplin', u'Edna Purviance', u'Jack..."


In [10]:
# Se pueden combinar campos en el ordenamiento:

# pelis de menor a mayor duracion, ordenadas por genero
data.sort_values(['genre','duration'])
data.head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."


## Filtrado Básico de datos:

### Algoritmo clasico:

In [11]:
# Quiero todas las peliculas con mas de 175 minutos de duracion:
# Creamos una lista de booleanos que indicara, para cada pelicula, si esa pelicula dura igual o más de 175 minutos:
booleans = []

for length in data.duration:
    if length >= 175:
        booleans.append(True)
    else:
        booleans.append(False)

In [12]:
# Ahora, le decimos a pandas que haga una comparacion de las posiciones del dataset con los posiciones
# de la lista de booleanos,(cada una de sus filas), para encontrar los matchs

# la lista de booleanos tiene la misma cantidad de filas que el dataframe, compara por posiciones
# evidentemente, solo mostrara las filas del dataframe, cuyo valor en la lista sea True:

data[booleans].head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
7,8.9,The Lord of the Rings: The Return of the King,PG-13,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."
8,8.9,Schindler's List,R,Biography,195,"[u'Liam Neeson', u'Ralph Fiennes', u'Ben Kings..."
10,8.8,The Lord of the Rings: The Fellowship of the Ring,PG-13,Adventure,178,"[u'Elijah Wood', u'Ian McKellen', u'Orlando Bl..."


### El codigo anterior es equivalente a este:

In [13]:
data[data.duration >= 175]
data.head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."


In [14]:
#Cuales son los generos de esas peliculas?
data[data.duration >= 175].genre.head()

1         Crime
2         Crime
7     Adventure
8     Biography
10    Adventure
Name: genre, dtype: object