# Exploring a DataFrame

## Setup

In [1]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)


## Create DataFrame from mantle.csv
Get the first seven columns and use 'Year' as the index column.

In [10]:
csv ='../csvs/mantle.csv'
mantle = pd.read_csv(csv,usecols = np.arange(7))
mantle.index = mantle.Year
mantle

Unnamed: 0_level_0,Year,Tm,G,PA,AB,R,H
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1951,1951,NYY,96,386,341,61,91
1952,1952,NYY,142,626,549,94,171
1953,1953,NYY,127,540,461,105,136
1954,1954,NYY,146,649,543,129,163
1955,1955,NYY,147,638,517,121,158
...,...,...,...,...,...,...,...
1964,1964,NYY,143,567,465,92,141
1965,1965,NYY,122,435,361,44,92
1966,1966,NYY,108,393,333,40,96
1967,1967,NYY,144,553,440,63,108


## Explore DataFrame
Get the column names, index, and the number of rows and columns

In [13]:
print(mantle.columns)
print(mantle.index)
print(mantle.shape)

Index(['Year', 'Tm', 'G', 'PA', 'AB', 'R', 'H'], dtype='object')
Int64Index([1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961,
            1962, 1963, 1964, 1965, 1966, 1967, 1968],
           dtype='int64', name='Year')
(18, 7)


## Get quick information on the data
For example, counts, means, mins and maxes

In [14]:
mantle.describe()

Unnamed: 0,Year,G,PA,AB,R,H
count,18.0,18.0,18.0,18.0,18.0,18.0
mean,1959.5,133.388889,550.388889,450.111111,93.111111,134.166667
std,5.338539,23.364475,122.925678,100.072261,33.585517,36.36134
min,1951.0,65.0,213.0,172.0,40.0,54.0
25%,1955.25,124.0,511.5,391.5,61.5,104.25
50%,1959.5,144.0,595.0,469.5,100.0,143.0
75%,1963.75,146.75,642.25,525.0,121.0,161.75
max,1968.0,153.0,654.0,549.0,132.0,188.0


## Get the first three rows of data

In [15]:
mantle.iloc[[0,1,2]]

Unnamed: 0_level_0,Year,Tm,G,PA,AB,R,H
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1951,1951,NYY,96,386,341,61,91
1952,1952,NYY,142,626,549,94,171
1953,1953,NYY,127,540,461,105,136


## Get the last three rows of data

In [16]:
mantle.iloc[[-1,-2,-3]]

Unnamed: 0_level_0,Year,Tm,G,PA,AB,R,H
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1968,1968,NYY,144,547,435,57,103
1967,1967,NYY,144,553,440,63,108
1966,1966,NYY,108,393,333,40,96


## Get just the AB and H columns

In [19]:
mantle[['AB','H']]

Unnamed: 0_level_0,AB,H
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1951,341,91
1952,549,171
1953,461,136
1954,543,163
1955,517,158
...,...,...
1964,465,141
1965,361,92
1966,333,96
1967,440,108
