In [1]:
import numpy as np
import pandas as pd

In [56]:
data = pd.Series([5, 3, 1, 4, 2], index=['a', 'f', 'd', 'c', 'b'])
data.loc['f':'c']

f    3
d    1
c    4
dtype: int64

In [45]:
data = pd.Series({'a': 1, 'b': 2, 'c': 3}, index=['c', 'a'])
data.index

Index(['c', 'a'], dtype='object')

In [25]:
data['b':'d']

Series([], dtype: int32)

In [6]:
data.values

array([0, 1, 2, 3])

In [16]:
data[2]

2

In [8]:
type(data[1:3])

pandas.core.series.Series

In [72]:
population_dict = {'California': 1000, 'Florida': 2000, 'Illinois': 3000, 'New York': 4000}
area_dict = {'Florida': 3000, 'California': 4000, 'Illinois': 3000, 'New York': 4000}
population_series = pd.Series(population_dict)
area_series = pd.Series(area_dict)
state_df = pd.DataFrame({'population': population_series, 'area': area_series})

In [118]:
type(state_df.iloc[0])

pandas.core.series.Series

In [86]:
type(state_df.index[1:3])

pandas.core.indexes.base.Index

In [85]:
type(state_df['area'] > 3000)

pandas.core.series.Series

In [87]:
state_df.loc[state_df['area'] > 3000]

Unnamed: 0,area,population
California,4000,1000
New York,4000,4000


In [92]:
state_df[1:3]

Unnamed: 0,area,population
Florida,3000,2000
Illinois,3000,3000


In [37]:
np.random.rand(3, 2)

array([[ 0.03118314,  0.02170284],
       [ 0.42112413,  0.97743625],
       [ 0.85556943,  0.32195248]])

In [92]:
l = [[0.03118314,  0.02170284],
       [0.42112413,  0.97743625],
       [0.85556943,  0.32195248]]
pd.DataFrame(l, columns=['foo', 'bar'], index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.031183,0.021703
b,0.421124,0.977436
c,0.855569,0.321952


In [93]:
x = [[1, 2], [3, 4]]
np.hstack([x, x])

array([[1, 2, 1, 2],
       [3, 4, 3, 4]])

In [44]:
indA = pd.Index([7, 5, 1, 3, 9])
indB = pd.Index([11, 5, 7, 3, 2])
indA & indB

Int64Index([7, 5, 3], dtype='int64')

# Handling Missing Data

In [8]:
s1 = pd.Series(np.random.randn(10), name='series1')
s2 = pd.Series(np.random.randn(10), name='series2')
s3 = pd.concat([s1, s2], axis=1)

In [11]:
s3.iloc[0]

series1    2.040692
series2    0.452641
Name: 0, dtype: float64

In [88]:
populations = pd.Series({('California', 2000): 1000, ('California', 2010): 2000,
           ('New York', 2000): 3000, ('New York', 2010): 4000})

populations.index.names = ['state', 'year']
populations.index

MultiIndex(levels=[['California', 'New York'], [2000, 2010]],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
           names=['state', 'year'])

In [56]:
populations.rename({'New York': 'Oregon',
                         'California': 'North Carolina'}, level='state', inplace=True)

In [57]:
populations

state           year
North Carolina  2000    1000
                2010    2000
Oregon          2000    3000
                2010    4000
dtype: int64

In [80]:
populations.iloc[0:2]

state           year
North Carolina  2000    1000
                2010    2000
dtype: int64

In [91]:
populations

state       year
California  2000    1000
            2010    2000
New York    2000    3000
            2010    4000
dtype: int64

In [97]:
def make_df(cols, ind):
    data = {c: [str(c) + str(i) for i in ind] for c in cols}
    return pd.DataFrame(data, index=ind)

In [116]:
x = make_df('AB', [1, 2, 3])
y = make_df('CD', [2, 3, 4])
pd.concat([x, y], axis='columns', join='inner')

Unnamed: 0,A,B,C,D
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [126]:
a = pd.Series([1, 2, 3], index=[4, 5, 6])
b = pd.Series([7, 8, 9], index=[4, 11, 12])
a.append(b)

4     1
5     2
6     3
4     7
11    8
12    9
dtype: int64

In [129]:
pd.concat([a, b], axis='columns')

Unnamed: 0,0,1
4,1.0,7.0
5,2.0,
6,3.0,
11,,8.0
12,,9.0
