# Data Wrangling with pandas Cheat Sheet
http://pandas.pydata.org

## Tidy Data - A foundation for wrangling in pandas

* Each variable is saved in its own column
* Each observation is saved in its own row

In [2]:
import pandas as pd

## Syntax - Creating DataFrames

In [17]:
# Specify values for each column
df = pd.DataFrame(
    {'a' : [ 4,  5,  6],
     'b' : [ 7,  8,  9],
     'c' : [10, 11, 12]})
df

Unnamed: 0,a,b,c
0,4,7,10
1,5,8,11
2,6,9,12


In [18]:
# Specify values for each row
df = pd.DataFrame(
    [[ 4,  7, 10],
     [ 5,  8, 11],
     [ 6,  9, 12]],
    index=[1, 2, 3],
    columns=['a','b','c'])
df

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,11
3,6,9,12


In [21]:
# Create DataFrame with a MultiIndex
df = pd.DataFrame(
    {'a' : [ 4,  5,  6],
     'b' : [ 7,  8,  9],
     'c' : [10, 11, 12]},
    index=pd.MultiIndex.from_tuples(
        [('d',1),('d',2),('e',2)],
        names=['n','v']))
df

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
n,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d,1,4,7,10
d,2,5,8,11
e,2,6,9,12


## Method Chaining
Most pandas methods return a DataFrame so that another pandas method can be applied to the result. This improves readability of code.

In [None]:
df = (pd.melt(df)
        .rename(columns={
                'vairable':'var',
                'value':'val'})
        .query('val >= 200')
     )

## Reshaping Data - Change the layout of a data set

In [3]:
df = pd.Series([1,2,3,4,5])
df

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [9]:
a = type(1)
type(a)

type

In [12]:
import numpy as np
df[::2] = np.nan

In [15]:
df.isnull().sum()

3

In [16]:
df.isnull().mean()

0.6

In [17]:
df.dtypes

dtype('float64')

In [22]:
movies = pd.read_csv('data/movie.csv')
movies.dtypes
# movie.get_dtype_counts()

color                         object
director_name                 object
num_critic_for_reviews       float64
duration                     float64
director_facebook_likes      float64
actor_3_facebook_likes       float64
actor_2_name                  object
actor_1_facebook_likes       float64
gross                        float64
genres                        object
actor_1_name                  object
movie_title                   object
num_voted_users                int64
cast_total_facebook_likes      int64
actor_3_name                  object
facenumber_in_poster         float64
plot_keywords                 object
movie_imdb_link               object
num_user_for_reviews         float64
language                      object
country                       object
content_rating                object
budget                       float64
title_year                   float64
actor_2_facebook_likes       float64
imdb_score                   float64
aspect_ratio                 float64
m