# Pandas basics

In [1]:
import numpy as np
import pandas as pd

## Pandas Series

In [2]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [3]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [4]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [6]:
print(data[0])
print(data[1:2])

0.25
1    0.5
dtype: float64


### Series as a generalized array

In [7]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [8]:
data.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [9]:
data['a']

0.25

In [10]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [11]:
data[['c','a','b']]

c    0.75
a    0.25
b    0.50
dtype: float64

In [12]:
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [13]:
population['New York']

19651127

## The Pandas DataFrame Object

In [14]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [15]:
states = pd.DataFrame({'population': population,
                       'area': area})
states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [19]:
states.index

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

In [21]:
states.columns

Index(['population', 'area'], dtype='object')

In [22]:
states['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

### Initialization from numpy array

In [23]:
pd.DataFrame(np.random.rand(3, 2),
             columns=['foo', 'bar'])

Unnamed: 0,foo,bar
0,0.27399,0.520901
1,0.683636,0.801021
2,0.895658,0.72555


In [24]:
pd.DataFrame(np.random.rand(3, 2),
             columns=['foo', 'bar'],
             index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.36896,0.163657
b,0.604273,0.851845
c,0.484279,0.649453
