![sslogo](https://github.com/stratascratch/stratascratch.github.io/raw/master/assets/sslogo.jpg)

# Quick Rendering Hack

#### For results requiring multiple tables, use the display class to format your results

In [None]:
class display(object):
    """Display HTML representation of multiple objects"""
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)
    
    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
                           for a in self.args)
    

In [1]:
import numpy as np
import pandas as pd

In [2]:
db_data = pd.read_csv('../datasets/nfl_combine.csv')

# Operating on Null Values

In [None]:
data = pd.Series([3.1414, np.nan, 'hello world', None])

#### Create a boolean mask for rows containing a non-null value

In [None]:
data.notnull()

0     True
1    False
2     True
3    False
dtype: bool

#### Create a table containing all non-null values from data using a masking operation

In [None]:
data[data.notnull()]

0         3.1414
2    hello world
dtype: object

### Dropping null values

In [None]:
df = pd.DataFrame([[1,      np.nan, 2     ],
                   [2,      3,      5     ],
                   [np.nan, 4,      6     ],
                   [np.nan, np.nan, np.nan],
                   [6     , np.nan, np.nan]])
df

Unnamed: 0,0,1,2
0,1.0,,2.0
1,2.0,3.0,5.0
2,,4.0,6.0
3,,,
4,6.0,,


#### Create a table with all non-null values from data using the dropna method

In [None]:
data.dropna()

0         3.1414
2    hello world
dtype: object

#### Create a table will all non-null rows from df

In [None]:
df.dropna()

Unnamed: 0,0,1,2
1,2.0,3.0,5.0


#### Create a table will all columns with less than three non-null values from df

In [None]:
df.dropna(axis='columns', thresh=3)

Unnamed: 0,0,2
0,1.0,2.0
1,2.0,5.0
2,,6.0
3,,
4,6.0,


### Filling null values

In [None]:
data = pd.Series([1, np.nan, 2, None, 3], index=list('abcde'))
data

a    1.0
b    NaN
c    2.0
d    NaN
e    3.0
dtype: float64

#### Create a table with all null values of data filled with 1

In [None]:
data.fillna(1)

a    1.0
b    1.0
c    2.0
d    1.0
e    3.0
dtype: float64

#### Create a table with all null values of data filled using the forward-fill method

In [None]:
# forward-fill
data.fillna(method='ffill')

a    1.0
b    1.0
c    2.0
d    2.0
e    3.0
dtype: float64

#### Create a table with all null values of data filled using the back-fill method

In [None]:
# back-fill
data.fillna(method='bfill')

a    1.0
b    2.0
c    2.0
d    3.0
e    3.0
dtype: float64

For ``DataFrame``s, the options are similar, but we can also specify an ``axis`` along which the fills take place:

In [None]:
df

Unnamed: 0,0,1,2
0,1.0,,2.0
1,2.0,3.0,5.0
2,,4.0,6.0
3,,,
4,6.0,,


#### Create a table will all null values of df filled using  the forward-fill method down each column

In [None]:
df.fillna(method='ffill', axis=0)

Unnamed: 0,0,1,2
0,1.0,,2.0
1,2.0,3.0,5.0
2,2.0,4.0,6.0
3,2.0,4.0,6.0
4,6.0,4.0,6.0


# Import Dataset

## Import dataset

In [None]:
#import nfl_combine

In [None]:
db_data.head()

Unnamed: 0,year,name,firstname,lastname,position,heightfeet,heightinches,heightinchestotal,weight,arms,...,vertical,broad,bench,round,college,pick,pickround,picktotal,wonderlic,nflgrade
0,2015,Ameer Abdullah,Ameer,Abdullah,RB,5,9.0,69.0,205,0.0,...,42.5,130.0,24.0,0.0,Nebraska,,0,0,0.0,5.9
1,2015,Nelson Agholor,Nelson,Agholor,WR,6,0.0,72.0,198,0.0,...,0.0,0.0,12.0,0.0,USC,,0,0,0.0,5.6
2,2015,Jay Ajayi,Jay,Ajayi,RB,6,0.0,72.0,221,0.0,...,39.0,121.0,19.0,0.0,Boise St.,,0,0,0.0,6.0
3,2015,Kwon Alexander,Kwon,Alexander,OLB,6,1.0,73.0,227,0.0,...,36.0,121.0,24.0,0.0,LSU,,0,0,0.0,5.4
4,2015,Mario Alford,Mario,Alford,WR,5,8.0,68.0,180,0.0,...,34.0,121.0,13.0,0.0,West Virginia,,0,0,0.0,5.3


# Basic Pandas Functionality

#### Print the information about db_data

In [None]:
db_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4947 entries, 0 to 4946
Data columns (total 26 columns):
year                 4947 non-null int64
name                 4947 non-null object
firstname            4947 non-null object
lastname             4947 non-null object
position             4947 non-null object
heightfeet           4947 non-null int64
heightinches         4947 non-null object
heightinchestotal    4947 non-null object
weight               4947 non-null int64
arms                 4947 non-null object
hands                4947 non-null object
fortyyd              4947 non-null float64
twentyyd             4947 non-null float64
tenyd                4947 non-null float64
twentyss             4947 non-null float64
threecone            4947 non-null float64
vertical             4947 non-null float64
broad                4947 non-null float64
bench                4947 non-null float64
round                4947 non-null float64
college              3478 non-null object
pick 

#### Print the first 8 rows of db_data

In [None]:
db_data.head(8)

Unnamed: 0,year,name,firstname,lastname,position,heightfeet,heightinches,heightinchestotal,weight,arms,...,vertical,broad,bench,round,college,pick,pickround,picktotal,wonderlic,nflgrade
0,2015,Ameer Abdullah,Ameer,Abdullah,RB,5,9.0,69.0,205,0.0,...,42.5,130.0,24.0,0.0,Nebraska,,0,0,0.0,5.9
1,2015,Nelson Agholor,Nelson,Agholor,WR,6,0.0,72.0,198,0.0,...,0.0,0.0,12.0,0.0,USC,,0,0,0.0,5.6
2,2015,Jay Ajayi,Jay,Ajayi,RB,6,0.0,72.0,221,0.0,...,39.0,121.0,19.0,0.0,Boise St.,,0,0,0.0,6.0
3,2015,Kwon Alexander,Kwon,Alexander,OLB,6,1.0,73.0,227,0.0,...,36.0,121.0,24.0,0.0,LSU,,0,0,0.0,5.4
4,2015,Mario Alford,Mario,Alford,WR,5,8.0,68.0,180,0.0,...,34.0,121.0,13.0,0.0,West Virginia,,0,0,0.0,5.3
5,2015,Javorius Allen,Javorius,Allen,RB,6,0.0,72.0,221,0.0,...,35.5,121.0,11.0,0.0,USC,,0,0,0.0,5.3
6,2015,Adrian Amos,Adrian,Amos,FS,6,0.0,72.0,218,0.0,...,35.5,122.0,0.0,0.0,Penn St.,,0,0,0.0,5.5
7,2015,Dres Anderson,Dres,Anderson,WR,6,1.0,73.0,187,0.0,...,0.0,0.0,13.0,0.0,Utah,,0,0,0.0,5.5


#### Print the last 10 rows of db_data

In [None]:
db_data.tail(10)

Unnamed: 0,year,name,firstname,lastname,position,heightfeet,heightinches,heightinchestotal,weight,arms,...,vertical,broad,bench,round,college,pick,pickround,picktotal,wonderlic,nflgrade
4937,1999,Jerry Wisne,Jerry,Wisne,OG,6,7.0,79.0,306,0.0,...,28.0,100.0,30.0,5.0,Notre Dame,10(143),10,10,0.0,0.0
4938,1999,Joe Wong,Joe,Wong,OT,6,6.0,78.0,314,0.0,...,27.0,100.0,22.0,7.0,Brigham Young,38(244),38,38,0.0,0.0
4939,1999,Damien Woody,Damien,Woody,OC,6,3.0,75.0,328,0.0,...,28.5,100.0,26.0,1.0,Boston College,17(17),17,17,0.0,0.0
4940,1999,Mark Word,Mark,Word,OLB,6,5.0,77.0,258,0.0,...,31.5,115.0,16.0,0.0,,,0,0,0.0,0.0
4941,1999,Anthony Wright,Anthony,Wright,QB,6,2.0,74.0,195,0.0,...,31.0,116.0,0.0,0.0,,,0,0,16.0,0.0
4942,1999,Daren Yancey,Daren,Yancey,DT,6,6.0,78.0,303,0.0,...,26.5,97.0,0.0,6.0,Brigham Young,19(188),19,19,0.0,0.0
4943,1999,Craig Yeast,Craig,Yeast,WR,5,8.0,68.0,164,0.0,...,32.5,112.0,0.0,4.0,Kentucky,3(98),3,3,0.0,0.0
4944,1999,Ryan Young,Ryan,Young,OT,6,6.0,78.0,335,0.0,...,0.0,0.0,20.0,7.0,Kansas State,17(223),17,17,0.0,0.0
4945,1999,Peppi Zellner,Peppi,Zellner,DE,6,5.0,77.0,246,0.0,...,35.5,122.0,20.0,4.0,Fort Valley State,37(132),37,37,0.0,0.0
4946,1999,Amos Zereoue,Amos,Zereoue,RB,5,8.0,68.0,203,0.0,...,0.0,0.0,0.0,3.0,West Virginia,34(95),34,34,0.0,0.0


#### Print the summary statistics of db_data

In [None]:
db_data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,4947.0,2007.132201,5.029664,1999.0,2003.0,2007.0,2012.0,2015.0
heightfeet,4947.0,5.805337,0.395981,5.0,6.0,6.0,6.0,6.0
weight,4947.0,245.579745,45.639366,155.0,208.0,237.0,289.0,386.0
fortyyd,4947.0,4.610386,0.974087,0.0,4.53,4.69,4.99,6.05
twentyyd,4947.0,0.073734,0.43233,0.0,0.0,0.0,0.0,2.98
tenyd,4947.0,0.129149,0.436941,0.0,0.0,0.0,0.0,1.92
twentyss,4947.0,3.298106,1.907526,0.0,3.835,4.24,4.47,5.56
threecone,4947.0,1.503002,2.929683,0.0,0.0,0.0,0.0,8.31
vertical,4947.0,28.741257,11.596749,0.0,28.0,32.5,35.5,46.0
broad,4947.0,95.944006,41.82634,0.0,101.0,112.0,119.0,147.0


### Filtering Dataframes

Remember: the syntax for filtering DataFrames is similar to that used for boolean masking

In [None]:
#import titanic

In [None]:
data.head()

Unnamed: 0,passengerid,survived,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S


#### Filter the data for females

Remember: the head method can be used to make results more concise

In [None]:
data[data.sex == 'female'].head()

Unnamed: 0,passengerid,survived,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,,C


#### Filter for names and age of passengers with fares greater than 50

In [None]:
data.loc[data.fare > 50, ['name', 'age']].head()

Unnamed: 0,name,age
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0
6,"McCarthy, Mr. Timothy J",54.0
27,"Fortune, Mr. Charles Alexander",19.0
31,"Spencer, Mrs. William Augustus (Marie Eugenie)",


### Adding methods to filters


#### Print the number of second class passengers

In [None]:
data.pclass[data.pclass == 2].count()


184

#### Print the survival rate of males in the third passenger class

survival rate = mean of survived column

In [None]:
data.survived[(data.sex == 'male')&(data.pclass == 3)].mean()

0.13544668587896252

#### Print the survival rate for women and children

In [None]:
data.survived[(data.sex == 'female')|(data.age < 18)].mean()

0.6881720430107527

#### Print the survival rate of men. Then, print the survival rate of women. 

This type of of group based aggregation will become simpler in the next section

In [None]:
print(data.survived[data.sex == 'male'].mean())
print(data.survived[data.sex == 'female'].mean())

0.18890814558058924
0.7420382165605095
