# The Pandas DataFrame Object

A DataFrame represents a ractangler table of data, and contains an ordered collection of columns, each of which can be a different value type.

In [116]:
import numpy as np 
import pandas as pd

In [117]:
#Create a DataFrame from a 2-D ndarray
df = pd.DataFrame(np.array([ [10,11,12,13],[20,21,22,23] ]))
df

Unnamed: 0,0,1,2,3
0,10,11,12,13
1,20,21,22,23


In [118]:
#Create a DataFrame for a list of series objects
df1 = pd.DataFrame( [pd.Series(np.arange(10, 15)),
                    pd.Series(np.arange(15, 20))] )
df1

Unnamed: 0,0,1,2,3,4
0,10,11,12,13,14
1,15,16,17,18,19


In [119]:
#Create a DataFrame with two series objects and a dictionary
s1 = pd.Series(np.arange(1, 6, 1))
s2 = pd.Series(np.arange(6, 11, 1))

df2 = pd.DataFrame({'boys': s1, 'girls': s2})
df2

Unnamed: 0,boys,girls
0,1,6
1,2,7
2,3,8
3,4,9
4,5,10


In [120]:
#Create a DataFrame with dictionary
data = {
            'name':   ["Asad", "Saad", "Fahad", "Ali"], 
            "age":    [23, 34, 23, 21], 
            "grades": ["A", "B", "C", "D"]
       }
data = pd.DataFrame(data)
data

Unnamed: 0,name,age,grades
0,Asad,23,A
1,Saad,34,B
2,Fahad,23,C
3,Ali,21,D


In [121]:
#Specify column name
df3 = pd.DataFrame(np.array([ [10,11], [20,21] ]), columns=["apples", "oranges"])
df3

Unnamed: 0,apples,oranges
0,10,11
1,20,21


In [122]:
#Create a DataFrame with named columns and rows
df4 = pd.DataFrame(np.array([ [10,11,12,13],[20,21,22,23] ]), index=['apples', 'oranges'], columns=['Mon', 'Tue', 'Wed', 'Thu'])

df4

Unnamed: 0,Mon,Tue,Wed,Thu
apples,10,11,12,13
oranges,20,21,22,23


In [123]:
data = {
    'state': ['Ohio','Ohio','Ohio','Neveda','Neveda','Neveda'], 
    'year':  [2000,2001,2002,2001,2002,2003],
    'pop': [1.5,1.7,3.6,2.4,2.9,3.2]
}

frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Neveda,2001,2.4
4,Neveda,2002,2.9
5,Neveda,2003,3.2


In [124]:
#in-place = false
pd.DataFrame(frame, columns=['year','state','pop', 'imports'])

Unnamed: 0,year,state,pop,imports
0,2000,Ohio,1.5,
1,2001,Ohio,1.7,
2,2002,Ohio,3.6,
3,2001,Neveda,2.4,
4,2002,Neveda,2.9,
5,2003,Neveda,3.2,


In [125]:
#Select a column
frame.year

0    2000
1    2001
2    2002
3    2001
4    2002
5    2003
Name: year, dtype: int64

In [126]:
frame['pop']

0    1.5
1    1.7
2    3.6
3    2.4
4    2.9
5    3.2
Name: pop, dtype: float64

In [127]:
frame2 = pd.DataFrame(data, columns=['year','state','pop','dept'], index=['one', 'two', 'three', 'four', 'five', 'six'])
frame2

Unnamed: 0,year,state,pop,dept
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Neveda,2.4,
five,2002,Neveda,2.9,
six,2003,Neveda,3.2,


In [128]:
#Enter 100 (const) in dept column
frame2.dept = "100"
frame2

Unnamed: 0,year,state,pop,dept
one,2000,Ohio,1.5,100
two,2001,Ohio,1.7,100
three,2002,Ohio,3.6,100
four,2001,Neveda,2.4,100
five,2002,Neveda,2.9,100
six,2003,Neveda,3.2,100


In [129]:
#Enter 0-6 (range) numbers in dept column
frame2['dept'] = np.arange(6)
frame2

Unnamed: 0,year,state,pop,dept
one,2000,Ohio,1.5,0
two,2001,Ohio,1.7,1
three,2002,Ohio,3.6,2
four,2001,Neveda,2.4,3
five,2002,Neveda,2.9,4
six,2003,Neveda,3.2,5


In [130]:
#Enter (series) in dept column
val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
frame2['dept'] = val
frame2

Unnamed: 0,year,state,pop,dept
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Neveda,2.4,-1.5
five,2002,Neveda,2.9,-1.7
six,2003,Neveda,3.2,


In [131]:
#Adding column to dataframe with bool values where state(column) has 'Ohio' value
frame2['eastern'] = frame2.state == 'Ohio' #ture/false
frame2

Unnamed: 0,year,state,pop,dept,eastern
one,2000,Ohio,1.5,,True
two,2001,Ohio,1.7,-1.2,True
three,2002,Ohio,3.6,,True
four,2001,Neveda,2.4,-1.5,False
five,2002,Neveda,2.9,-1.7,False
six,2003,Neveda,3.2,,False


In [132]:
#Adding column to dataframe with bool values where pop(column) has value > 2
frame2['greaterThenTwo'] = frame2['pop'] > 2
frame2

Unnamed: 0,year,state,pop,dept,eastern,greaterThenTwo
one,2000,Ohio,1.5,,True,False
two,2001,Ohio,1.7,-1.2,True,False
three,2002,Ohio,3.6,,True,True
four,2001,Neveda,2.4,-1.5,False,True
five,2002,Neveda,2.9,-1.7,False,True
six,2003,Neveda,3.2,,False,True


In [133]:
#delete eastern column
del frame2['eastern']

In [134]:
frame2

Unnamed: 0,year,state,pop,dept,greaterThenTwo
one,2000,Ohio,1.5,,False
two,2001,Ohio,1.7,-1.2,False
three,2002,Ohio,3.6,,True
four,2001,Neveda,2.4,-1.5,True
five,2002,Neveda,2.9,-1.7,True
six,2003,Neveda,3.2,,True
