In [1]:
import numpy as np
import pandas as pd

pd.__version__

'1.1.3'

In [2]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [3]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [4]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
data[1]

0.5

In [6]:
data[1:3]

1    0.50
2    0.75
dtype: float64

In [9]:
# The index doesn't need to consist of
# numeric values
my_s = pd.Series([2, 3, 4, 5], index=['a', 'b', 'c', 'd'])
my_s

a    2
b    3
c    4
d    5
dtype: int64

In [11]:
my_s['b']

3

In [12]:
# We can create a Series object from a
# Python dictionary
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [14]:
# We can slice a Series object even when using
# string keys
sub_pop = population['Texas':'Florida']
sub_pop

Texas       26448193
New York    19651127
Florida     19552860
dtype: int64

In [15]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [17]:
# A Series object is like an idividual
# column
states = pd.DataFrame({
    'population': population,
    'area': area
})
states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [22]:
states.index

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

In [23]:
states.columns

Index(['population', 'area'], dtype='object')

In [24]:
states['population']

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
Name: population, dtype: int64

In [28]:
# A DataFrame is a collection of Series objects,
# and a single-column DataFrame can be constructed
# from a single Series
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [29]:
# We can create a DataFrame from a list of dicts
data = [{'a': i, 'b': 2 * i}
        for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [35]:
data = [[1, 2], [3, 4]]
pd.DataFrame(data, columns=['col', 'another'])

Unnamed: 0,col,another
0,1,2
1,3,4


In [37]:
data = [{'a': 1, 'b': 2}]
pd.DataFrame(data)

Unnamed: 0,a,b
0,1,2


In [42]:
# If some keys are missing, Pandas will fill
# them out with NaN
pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [43]:
# We can create a DataFrame from a dictionary of
# Series objects
pd.DataFrame({'population': population,
              'area': area})

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [44]:
# From a 2D NumPy array
pd.DataFrame(np.random.rand(3, 2),
             columns=['foo', 'bar'],
             index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.276954,0.342718
b,0.869742,0.478221
c,0.23374,0.193519
