In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [3]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [4]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
data[1]

0.5

In [6]:
data[1:3]

1    0.50
2    0.75
dtype: float64

In [7]:
## Series as generalized NumPy array

data = pd.Series([0.25, 0.5, 0.75, 1.0],
                index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [8]:
data['d']

1.0

In [9]:
# Series as specialized dictionary
population_dict = {
    'Clifornia' : 38339334,
    'Texas' : 2454343,
    'New York' : 3424242,
    'Florida' : 1955286,
    'Illinois' : 12882135
}
population = pd.Series(population_dict)
population

Clifornia    38339334
Texas         2454343
New York      3424242
Florida       1955286
Illinois     12882135
dtype: int64

In [10]:
population['New York']

3424242

In [11]:
# Constructing Series objects
pd.Series([2, 4, 6])

0    2
1    4
2    6
dtype: int64

In [12]:
pd.Series({2:'a', 1:'b', 3:'c'})

2    a
1    b
3    c
dtype: object

In [13]:
pd.Series({2:'a', 1:'b', 3:'c'}, index=[3,2])

3    c
2    a
dtype: object

In [14]:
area_dict = {
    'Clifornia' : 38339334,
    'Texas' : 2454343,
    'New York' : 3424242,
    'Florida' : 1955286,
    'Illinois' : 12882135
}

In [15]:
area = pd.Series(area_dict)

In [16]:
area

Clifornia    38339334
Texas         2454343
New York      3424242
Florida       1955286
Illinois     12882135
dtype: int64

In [17]:
states = pd.DataFrame({'population' : population,
                      'area' : area})
states

Unnamed: 0,population,area
Clifornia,38339334,38339334
Texas,2454343,2454343
New York,3424242,3424242
Florida,1955286,1955286
Illinois,12882135,12882135


In [18]:
states.index

Index(['Clifornia', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

In [19]:
states.columns

Index(['population', 'area'], dtype='object')

In [20]:
## DataFrame as specialized dictionary

In [21]:
states['area']

Clifornia    38339334
Texas         2454343
New York      3424242
Florida       1955286
Illinois     12882135
Name: area, dtype: int64

In [22]:
# Constructing DataFrame objects
pd.DataFrame(population, columns=['population']) # from a single series objjects

Unnamed: 0,population
Clifornia,38339334
Texas,2454343
New York,3424242
Florida,1955286
Illinois,12882135


In [23]:
# from alist of dictionaries
data = [{'a':i, 'b':2*i}
        for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [24]:
# from a dictionary of series objects
pd.DataFrame({'population':population,
              'area' : area})

Unnamed: 0,population,area
Clifornia,38339334,38339334
Texas,2454343,2454343
New York,3424242,3424242
Florida,1955286,1955286
Illinois,12882135,12882135


In [29]:
# from a two dimensional numpy array
pd.DataFrame(np.random.rand(3, 2),
             columns=['foo','bar'],
             index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.727661,0.866957
b,0.251558,0.169201
c,0.344469,0.035887


In [30]:
# from a Numpy Structured Array
A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])
A

array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])

In [31]:
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0
