# Working with DataFrames

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('max_columns', 50)
%matplotlib inline
import os
os.getcwd()

'C:\\Users\\Kelley\\PycharmProjects\\Python-Library\\Pandas'

### References

[Official Pandas Documentation](https://pandas.pydata.org/pandas-docs/stable/api.html#dataframe)

[Greg Reda Intro](http://www.gregreda.com/2013/10/26/working-with-pandas-dataframes/)

[R Comparison](https://pandas.pydata.org/pandas-docs/stable/comparison_with_r.html)

[Modern Pandas](https://github.com/TomAugspurger/effective-pandas/blob/master/modern_1_intro.ipynb)

[Pandas Cheat Sheet](http://pandas.pydata.org/Pandas_Cheat_Sheet.pdf)

## Import / Create Data

In [90]:
# Import DF
from_csv = pd.read_csv('../Data/mariano-rivera.csv')
football = pd.read_excel('../football_data.xlsx')
# Create DF w/ Dict
extradata = pd.DataFrame({'year': [2013, 2014, 2013],
        'team': ['Wily Coyotes', None, 'Wily Coyotes'],  # None = NULL
        'wins': [15, 16, 15],
        'losses': [None, 3, None]})  # NaN = NULL for numeric columns

# Create Series & Df
notes = pd.Series(["this one wasn't good", "NCR-12345 has a problem", "zip code was at 10101 and ticket #12345", "Bob and bobbert had a good time", "mascot was there", 
                   "had a free drink", "", "Pandas are fun. Mr Bob would know.", "i think this is a fake team", "missing team"])
notes_df = pd.DataFrame(notes)
football

Unnamed: 0,year,team,wins,losses
0,2010,Bears,11,5
1,2011,Bears,8,8
2,2012,Bears,10,6
3,2011,Packers,15,1
4,2012,Packers,11,5
5,2010,Lions,6,10
6,2011,Lions,10,6
7,2012,Lions,4,12


## Concat (Merge & Join)

In [92]:
merge = pd.concat([football, extradata], ignore_index = True)  # new table index
join = pd.concat([merge, notes_df], axis = 1)  # axis={0,1} for rows/cols

f = join; f

Unnamed: 0,year,team,wins,losses,0
0,2010,Bears,11,5.0,this one wasn't good
1,2011,Bears,8,8.0,NCR-12345 has a problem
2,2012,Bears,10,6.0,zip code was at 10101 and ticket #12345
3,2011,Packers,15,1.0,Bob and bobbert had a good time
4,2012,Packers,11,5.0,mascot was there
5,2010,Lions,6,10.0,had a free drink
6,2011,Lions,10,6.0,
7,2012,Lions,4,12.0,Pandas are fun. Mr Bob would know.
8,2013,Wily Coyotes,15,,i think this is a fake team
9,2014,,16,3.0,missing team


### Explore

In [17]:
f.head()
f.shape  # row/col
f.sample(2)  # random sample rows
f.dtypes
f.isnull()
f.info
f.describe()  # statistics summary

Unnamed: 0,year,wins,losses
count,8.0,8.0,8.0
mean,2011.125,9.375,6.625
std,0.834523,3.377975,3.377975
min,2010.0,4.0,1.0
25%,2010.75,7.5,5.0
50%,2011.0,10.0,6.0
75%,2012.0,11.0,8.5
max,2012.0,15.0,12.0


# Dataframe Manipulation

### Select

In [84]:
f[['year', 'team']]

Unnamed: 0,year,team
0,2010,Bears
1,2011,Bears
2,2012,Bears
3,2011,Packers
4,2012,Packers
5,2010,Lions
6,2011,Lions
7,2012,Lions
8,2013,Wily Coyotes
9,2014,


### Filter by Values

In [64]:
f[(f['year'] > 2011) & (f['team'] != 'Bears')]

Unnamed: 0,year,team,wins,losses,0
4,2012,Packers,11,5.0,mascot was there
7,2012,Lions,4,12.0,Pandas are fun. Mr Bob would know.
8,2013,Wily Coyotes,15,,i think this is a fake team
9,2014,,16,3.0,missing team
10,2013,Wily Coyotes,15,,


### Filter duplicate rows

In [72]:
f.drop_duplicates(['year', 'team'])  # based on [optional] columns

Unnamed: 0,year,team,wins,losses,0
0,2010,Bears,11,5.0,this one wasn't good
1,2011,Bears,8,8.0,NCR-12345 has a problem
2,2012,Bears,10,6.0,zip code was at 10101 and ticket #12345
3,2011,Packers,15,1.0,Bob and bobbert had a good time
4,2012,Packers,11,5.0,mascot was there
5,2010,Lions,6,10.0,had a free drink
6,2011,Lions,10,6.0,
7,2012,Lions,4,12.0,Pandas are fun. Mr Bob would know.
8,2013,Wily Coyotes,15,,i think this is a fake team
9,2014,,16,3.0,missing team


### Filter NULLS (None, NaN)

In [81]:
f[f['team'].notnull()]

Unnamed: 0,year,team,wins,losses,0
0,2010,Bears,11,5.0,this one wasn't good
1,2011,Bears,8,8.0,NCR-12345 has a problem
2,2012,Bears,10,6.0,zip code was at 10101 and ticket #12345
3,2011,Packers,15,1.0,Bob and bobbert had a good time
4,2012,Packers,11,5.0,mascot was there
5,2010,Lions,6,10.0,had a free drink
6,2011,Lions,10,6.0,
7,2012,Lions,4,12.0,Pandas are fun. Mr Bob would know.
8,2013,Wily Coyotes,15,,i think this is a fake team
10,2013,Wily Coyotes,15,,


In [83]:
f[f['losses'].notnull() == False]

Unnamed: 0,year,team,wins,losses,0
8,2013,Wily Coyotes,15,,i think this is a fake team
10,2013,Wily Coyotes,15,,


### Anti Join