In [1]:
import pandas as pd

In [2]:
local_source = '../../datasets/titanic.csv'
df = pd.read_csv(local_source)

# 3.1 Counting Columns in your data

## From Shape

In [None]:
df.shape

In [None]:
cols = df.shape[1]
print(cols)

## From Axes

In [None]:
df.axes

In [None]:
cols = df.axes[1]
print(len(cols))

## From Columns

In [None]:
df.columns

In [None]:
cols = df.columns
print(len(cols))

In [None]:
df.columns.size

# 3.2 Counting Rows in Data Frame

In [None]:
len(df)

## From Index

In [None]:
df.index

In [None]:
len(df.index)

In [None]:
rows = df.index
print(len(rows))

## From Shape

In [None]:
df.shape

In [None]:
rows = df.shape[0]
print(rows)

## From Axes

In [None]:
df.axes

In [None]:
rows = df.axes[0]
print(len(rows))

# 3.3 Getting Data Frame info

In [None]:
df.info

In [None]:
df.info()

In [None]:
df.info()

In [None]:
df.describe

In [None]:
df.describe()

In [None]:
df.describe()['PassengerId']

# 3.4 Column specific correct values

In [None]:
df.count()

In [None]:
df.count()['Age']

In [None]:
df['Age'].count()

In [None]:
len(df['Age'])

# 3.5 Checking Missing (Null) Values

## Checking missing values per column - isna/isnull
### Both isna and isnull perform the same operation

In [None]:
df.isna().sum()

In [None]:
df.isnull().sum()

## Total (Sum) null or na values in the dataframe

In [None]:
df.isnull().values.sum()

In [None]:
df.isna().values.sum()

In [None]:
count_nan = len(df) - df.count()

In [None]:
print(count_nan)

In [None]:
df['Age'].isnull().values.sum()

In [None]:
df['Age'].isna().values.sum()

# 3.6 Sampling Data in DataFrame

## Getting few lines of data from top

In [None]:
df.head()

In [None]:
df.head(3)

In [None]:
df.head(10)

## Getting few lines of data from bottom

In [None]:
df.tail()

In [None]:
df.tail(2)

In [None]:
df.tail(10)

## Getting few lines of data randomly from anywhere

In [None]:
df.sample()

In [None]:
df.sample(10)

In [None]:
df.sample(10)['Name']

# 3.7 Getting DataFrame rows based on index values 

In [3]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [4]:
df_indexed = pd.read_csv(local_source, index_col='Ticket')

In [20]:
# Note: the value must be exact key name, wild card or partial value will not work
target_row = df_indexed.index.get_loc('PC 17599')

In [None]:
# Uncomment the line below to see the results
# target_row = df_indexed.index.get_loc('17599')

In [9]:
df_indexed.iloc[target_row:target_row+1]

Unnamed: 0_level_0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
Ticket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
PC 17599,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,71.2833,C85,C


### Another Option

In [12]:
df_indexed['PC 17599':].head(1)

Unnamed: 0_level_0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
Ticket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
PC 17599,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,71.2833,C85,C


In [17]:
# Intentionally used to show that dataframe must be indexed otherwise it will not work
# Note - uncomment the line below
# df['PC 17599':]

# 3.8 Getting DataFrame rows just by slicing 

Usage: dataframe.iloc[row_index, column_index: size]
- iloc means "(i)nteger" + "(loc)ation"
- Note: row and column index are 0 based

In [23]:
df.iloc[0:3]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [51]:
df.iloc[100:]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,101,0,3,"Petranec, Miss. Matilda",female,28.0,0,0,349245,7.8958,,S
101,102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S
102,103,0,1,"White, Mr. Richard Frasar",male,21.0,0,1,35281,77.2875,D26,S
103,104,0,3,"Johansson, Mr. Gustaf Joel",male,33.0,0,0,7540,8.6542,,S
104,105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37.0,2,0,3101276,7.9250,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [38]:
df.iloc[10:50]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
10,11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7,G6,S
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.55,C103,S
12,13,0,3,"Saundercock, Mr. William Henry",male,20.0,0,0,A/5. 2151,8.05,,S
13,14,0,3,"Andersson, Mr. Anders Johan",male,39.0,1,5,347082,31.275,,S
14,15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14.0,0,0,350406,7.8542,,S
15,16,1,2,"Hewlett, Mrs. (Mary D Kingcome)",female,55.0,0,0,248706,16.0,,S
16,17,0,3,"Rice, Master. Eugene",male,2.0,4,1,382652,29.125,,Q
17,18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13.0,,S
18,19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vande...",female,31.0,1,0,345763,18.0,,S
19,20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C


In [25]:
df.shape[0]

891

In [27]:
df.iloc[df.shape[0]-1:]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [30]:
df.iloc[df.shape[0]-5:]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [59]:
# Getting Exact Row
df.iloc[100,0:]

PassengerId                        101
Survived                             0
Pclass                               3
Name           Petranec, Miss. Matilda
Sex                             female
Age                                 28
SibSp                                0
Parch                                0
Ticket                          349245
Fare                            7.8958
Cabin                              NaN
Embarked                             S
Name: 100, dtype: object

In [64]:
df.iloc[100,4:]

Sex         female
Age             28
SibSp            0
Parch            0
Ticket      349245
Fare        7.8958
Cabin          NaN
Embarked         S
Name: 100, dtype: object

In [60]:
# Getting Exact Row
df.iloc[800,0:]

PassengerId                     801
Survived                          0
Pclass                            2
Name           Ponesell, Mr. Martin
Sex                            male
Age                              34
SibSp                             0
Parch                             0
Ticket                       250647
Fare                             13
Cabin                           NaN
Embarked                          S
Name: 800, dtype: object

In [68]:
df.iloc[2]

PassengerId                         3
Survived                            1
Pclass                              3
Name           Heikkinen, Miss. Laina
Sex                            female
Age                                26
SibSp                               0
Parch                               0
Ticket               STON/O2. 3101282
Fare                            7.925
Cabin                             NaN
Embarked                            S
Name: 2, dtype: object

In [70]:
# Getting results as DataFrame
df.iloc[[2]]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


# 3.9 Turing DataFrame down (Just slice it)

In [71]:
df[::-1]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.7500,,Q
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [72]:
df[::1]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C
