In [10]:
# The Pandas DataFrame Object: The DataFrame can be thought of either as a generalization of a NumPy array, 
# or as a specialization of a Python dictionary.
import numpy as np
import pandas as pd

In [11]:
population_dict = {'California': 38332521,
'Texas': 26448193,
'New York': 19651127,
'Florida': 19552860,
'Illinois': 12882135}

population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [12]:
# Dataframe as Generalized NumPy Array: Data frame can be thought of as a two-dimensional array with flexible 
# row indices and flexible comun names. They can be thought of a two dimensional array of an ordered sequence
# of one dimensional columns or as a sequence of aligned(sharing same index) Series objects. 

area_dict = {'California':44444, 'Texas':66666, 'New York':14444,
             'Florida':17777, 'Illinois':14999}
area = pd.Series(area_dict)

area


California    44444
Texas         66666
New York      14444
Florida       17777
Illinois      14999
dtype: int64

In [13]:
states = pd.DataFrame({'population': population,
'area': area})
states

Unnamed: 0,population,area
California,38332521,44444
Texas,26448193,66666
New York,19651127,14444
Florida,19552860,17777
Illinois,12882135,14999


In [6]:
states.index

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

In [14]:
states.columns

Index(['population', 'area'], dtype='object')

In [15]:
# DataFrame as specialized dictionary: As a dictionary maps a key to a value, a DataFrame maps a column name 
# to a Series of column data.

states['area']

California    44444
Texas         66666
New York      14444
Florida       17777
Illinois      14999
Name: area, dtype: int64

In [16]:
states['population']

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
Name: population, dtype: int64

In [22]:
# Constructing DataFrame objects:

In [23]:
# (i) From list of dicts: A list of dictionaries can be made into data frames.
data = [{'a': i, 'b': 2 * i}
for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [21]:
# (ii) From a Single Series Objects: 

pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [24]:
pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])         # For missing data, Pandas fills it with NaN (Not a Number)

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [25]:
# (iii) From a Dictionary of Series Object: 
pd.DataFrame({'population': population,
'area': area})

Unnamed: 0,population,area
California,38332521,44444
Texas,26448193,66666
New York,19651127,14444
Florida,19552860,17777
Illinois,12882135,14999


In [26]:
# (iv) From a 2D NumPy Array:  
pd.DataFrame(np.random.rand(3, 2),
columns=['foo', 'bar'],
index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.420641,0.742707
b,0.296516,0.184173
c,0.723081,0.048032


In [27]:
# (v) From NumPy Structured Array: 
A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])
A

array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])

In [28]:
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0
