![sslogo](https://github.com/stratascratch/stratascratch.github.io/raw/master/assets/sslogo.jpg)

# Basic Pandas Functionality


In [None]:
import numpy as np
import pandas as pd

In [None]:
data = pd.read_csv('titanic.csv')

#### Investigate the first few rows of data

In [None]:
data.head()

Unnamed: 0,passengerid,survived,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S


#### Investigate the last 10 rows of data

In [None]:
data.tail(10)

Unnamed: 0,passengerid,survived,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
881,882,0,3,"Markun, Mr. Johann",male,33.0,0,0,349257,7.8958,,S
882,883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22.0,0,0,7552,10.5167,,S
883,884,0,2,"Banfield, Mr. Frederick James",male,28.0,0,0,C.A./SOTON 34068,10.5,,S
884,885,0,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.05,,S
885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.125,,Q
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


#### Investigate the data types in the DataFrame

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
passengerid    891 non-null int64
survived       891 non-null int64
pclass         891 non-null int64
name           891 non-null object
sex            891 non-null object
age            714 non-null object
sibsp          891 non-null int64
parch          891 non-null int64
ticket         891 non-null object
fare           891 non-null float64
cabin          204 non-null object
embarked       889 non-null object
dtypes: float64(1), int64(5), object(6)
memory usage: 83.6+ KB


#### Get some summary statistics

In [None]:
data.describe().T

### Filtering Dataframes

You can filter data based on the columns and values in the dataframe

#### Filter the data for men

In [None]:
data[data.sex=='male']

Unnamed: 0,passengerid,survived,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.2500,,S
4,5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.0500,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.0750,,S
12,13,0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.0500,,S
13,14,0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.2750,,S
16,17,0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.1250,,Q
17,18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13.0000,,S
20,21,0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26.0000,,S


#### Filter the ages for the men

In [None]:
data.age[data.sex=='male']

0        22
4        35
5      None
6        54
7         2
12       20
13       39
16        2
17     None
20       35
21       34
23       28
26     None
27       19
29     None
30       40
33       66
34       28
35       42
36     None
37       21
42     None
45     None
46     None
48     None
50        7
51       21
54       65
55     None
57     28.5
       ... 
840      20
841      16
843    34.5
844      17
845      42
846    None
847      35
848      28
850       4
851      74
857      51
859    None
860      41
861      21
864      24
867      31
868    None
869       4
870      26
872      33
873      47
876      20
877      19
878    None
881      33
883      28
884      25
886      27
889      26
890      32
Name: age, Length: 577, dtype: object

### Adding methods to filters

A method is a function and is used frequently when analyzing data in Pandas. There are countless Pandas methods. We'll go over a few of the basic ones to show how you can use methods to quickly analyze your data.

#### How many men and women were on the Titanic?

In [None]:
data.sex[data.sex=='male'].count()

577

In [None]:
data.sex[data.sex=='female'].count()

314

#### What was the survival rate for adult men (age>=18)

In [None]:
data.survived[(data.sex=='male')&(data.age>=18)].mean()

0.17721518987341772

#### What was the survival rate for women and children?

In [None]:
data.survived[(data.sex=='female')|(data.age<18)].mean()

0.6881720430107527

#### Use groupby to compare the survival rates of men and women

In [None]:
data.groupby('sex')['survived'].mean()

sex
female    0.742038
male      0.188908
Name: survived, dtype: float64

#### Create a DataFrame with groupby 

In [None]:
new = data.groupby(['sex','pclass'])['survived'].mean()
new

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
sex,pclass,Unnamed: 2_level_1
female,1,0.968085
female,2,0.921053
female,3,0.5
male,1,0.368852
male,2,0.157407
male,3,0.135447
