In [3]:
import numpy as np
import pandas as pd 

# Basic Data Structure

In [None]:
# pandas provides two types of classes for handling data:

#Series: a one-dimensional labeled array holding data of any type
#such as integers, strings, Python objects etc.

#DataFrame: a two-dimensional data structure that holds data like a two-dimension array or a table with rows and columns.

# Object creation

In [4]:
# Creating a Series by passing a list of values

s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [9]:
#Creating a DataFrame by passing a NumPy array with a datetime index using date_range() and labeled columns:

dates = pd.date_range("20260204",periods = 6)
dates

DatetimeIndex(['2026-02-04', '2026-02-05', '2026-02-06', '2026-02-07',
               '2026-02-08', '2026-02-09'],
              dtype='datetime64[ns]', freq='D')

In [12]:
df = pd.DataFrame(np.random.randn(6,4),index=dates , columns=list("ABCD") )
df

Unnamed: 0,A,B,C,D
2026-02-04,0.10207,0.328711,1.37699,0.862003
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453
2026-02-08,0.856077,0.373343,-0.277296,1.811717
2026-02-09,-0.073891,-0.302611,1.143035,-1.356787


In [15]:
# Creating a DataFrame by passing a dictionary of objects where the keys are the column labels and the values are the column values.

df2 = pd.DataFrame(
  {
    "A":1,
    "B":pd.Timestamp("20260202"),
    "C":pd.Series(1 , index=list(range(4)) , dtype='float'),
    "D":np.array([3] * 4 , dtype='int32'),
    "E":pd.Categorical(["test","train","test","train"]),
    "F":"foo",
  }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1,2026-02-02,1.0,3,test,foo
1,1,2026-02-02,1.0,3,train,foo
2,1,2026-02-02,1.0,3,test,foo
3,1,2026-02-02,1.0,3,train,foo


In [16]:
df2.dtypes

A            int64
B    datetime64[s]
C          float64
D            int32
E         category
F           object
dtype: object

# Viewing data

In [None]:
# use head to show first rows in data frame bydefault 5 rows
df.head()

Unnamed: 0,A,B,C,D
2026-02-04,0.10207,0.328711,1.37699,0.862003
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453
2026-02-08,0.856077,0.373343,-0.277296,1.811717


In [25]:
# tail use to show last rows in data 
df.tail(4)

Unnamed: 0,A,B,C,D
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453
2026-02-08,0.856077,0.373343,-0.277296,1.811717
2026-02-09,-0.073891,-0.302611,1.143035,-1.356787


In [None]:

df2.index


Index([0, 1, 2, 3], dtype='int64')

In [27]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [29]:
#describe() shows a quick statistic summary of your data:

df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.267121,0.194042,-0.126599,0.215076
std,0.996599,0.650898,1.17376,1.285103
min,-2.11521,-0.468798,-1.425901,-1.356787
25%,-0.327427,-0.251694,-1.019907,-0.860626
50%,-0.016861,0.114884,-0.301057,0.377775
75%,0.086595,0.362185,0.787952,1.109496
max,0.856077,1.332548,1.37699,1.811717


In [31]:
# Transposing your data

df.T

Unnamed: 0,2026-02-04,2026-02-05,2026-02-06,2026-02-07,2026-02-08,2026-02-09
A,0.10207,-2.11521,-0.411939,0.040169,0.856077,-0.073891
B,0.328711,1.332548,-0.098942,-0.468798,0.373343,-0.302611
C,1.37699,-1.425901,-0.324817,-1.251603,-0.277296,1.143035
D,0.862003,-1.112018,1.191994,-0.106453,1.811717,-1.356787


In [35]:
# DataFrame.sort_index() sorts by an axis


df.sort_index(axis=1 , ascending= False)


Unnamed: 0,D,C,B,A
2026-02-04,0.862003,1.37699,0.328711,0.10207
2026-02-05,-1.112018,-1.425901,1.332548,-2.11521
2026-02-06,1.191994,-0.324817,-0.098942,-0.411939
2026-02-07,-0.106453,-1.251603,-0.468798,0.040169
2026-02-08,1.811717,-0.277296,0.373343,0.856077
2026-02-09,-1.356787,1.143035,-0.302611,-0.073891


In [36]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453
2026-02-09,-0.073891,-0.302611,1.143035,-1.356787
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994
2026-02-04,0.10207,0.328711,1.37699,0.862003
2026-02-08,0.856077,0.373343,-0.277296,1.811717
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018


# Selection

## Getitem

In [37]:
df["A"]

2026-02-04    0.102070
2026-02-05   -2.115210
2026-02-06   -0.411939
2026-02-07    0.040169
2026-02-08    0.856077
2026-02-09   -0.073891
Freq: D, Name: A, dtype: float64

In [38]:
df.A

2026-02-04    0.102070
2026-02-05   -2.115210
2026-02-06   -0.411939
2026-02-07    0.040169
2026-02-08    0.856077
2026-02-09   -0.073891
Freq: D, Name: A, dtype: float64

In [39]:
df[["A","B"]]

Unnamed: 0,A,B
2026-02-04,0.10207,0.328711
2026-02-05,-2.11521,1.332548
2026-02-06,-0.411939,-0.098942
2026-02-07,0.040169,-0.468798
2026-02-08,0.856077,0.373343
2026-02-09,-0.073891,-0.302611


In [44]:
# slicing the rows 
df[0:3]

Unnamed: 0,A,B,C,D
2026-02-04,0.10207,0.328711,1.37699,0.862003
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994


In [46]:
df['2026-02-04':'2026-02-07']

Unnamed: 0,A,B,C,D
2026-02-04,0.10207,0.328711,1.37699,0.862003
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453


## Selection by label

In [47]:
df.loc[dates[0]]

A    0.102070
B    0.328711
C    1.376990
D    0.862003
Name: 2026-02-04 00:00:00, dtype: float64

In [48]:
df

Unnamed: 0,A,B,C,D
2026-02-04,0.10207,0.328711,1.37699,0.862003
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453
2026-02-08,0.856077,0.373343,-0.277296,1.811717
2026-02-09,-0.073891,-0.302611,1.143035,-1.356787


In [53]:
df.loc[:,["A","C"]]

Unnamed: 0,A,C
2026-02-04,0.10207,1.37699
2026-02-05,-2.11521,-1.425901
2026-02-06,-0.411939,-0.324817
2026-02-07,0.040169,-1.251603
2026-02-08,0.856077,-0.277296
2026-02-09,-0.073891,1.143035


In [54]:
df.loc["2026-02-04":"2026-02-07",["A","B"]]

Unnamed: 0,A,B
2026-02-04,0.10207,0.328711
2026-02-05,-2.11521,1.332548
2026-02-06,-0.411939,-0.098942
2026-02-07,0.040169,-0.468798


In [55]:
df.loc[dates[0],"D"]

0.8620027029990369

In [56]:
df.at[dates[0],"D"]

0.8620027029990369

## Selection by Position

In [57]:
df.iloc[3]

A    0.040169
B   -0.468798
C   -1.251603
D   -0.106453
Name: 2026-02-07 00:00:00, dtype: float64

In [59]:
df.iloc[0:2,2:4]

Unnamed: 0,C,D
2026-02-04,1.37699,0.862003
2026-02-05,-1.425901,-1.112018


In [60]:
df.iloc[[1,2,3],[1,2]]

Unnamed: 0,B,C
2026-02-05,1.332548,-1.425901
2026-02-06,-0.098942,-0.324817
2026-02-07,-0.468798,-1.251603


In [61]:
df.iloc[1:3,:]

Unnamed: 0,A,B,C,D
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994


In [62]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2026-02-04,0.328711,1.37699
2026-02-05,1.332548,-1.425901
2026-02-06,-0.098942,-0.324817
2026-02-07,-0.468798,-1.251603
2026-02-08,0.373343,-0.277296
2026-02-09,-0.302611,1.143035


In [63]:
df.iloc[3,3]

-0.10645294364808917

In [64]:
df.iat[3,3]

-0.10645294364808917

## Boolean indexing 

In [66]:
# return all rows that contain values in a > 0
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2026-02-04,0.10207,0.328711,1.37699,0.862003
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453
2026-02-08,0.856077,0.373343,-0.277296,1.811717


In [None]:
df[df> 0]

Unnamed: 0,A,B,C,D
2026-02-04,0.10207,0.328711,1.37699,0.862003
2026-02-05,,1.332548,,
2026-02-06,,,,1.191994
2026-02-07,0.040169,,,
2026-02-08,0.856077,0.373343,,1.811717
2026-02-09,,,1.143035,


In [68]:
df3 = df.copy()

In [69]:
df3["E"] = ["one", "one", "two", "three", "four", "three"]

In [70]:
df3

Unnamed: 0,A,B,C,D,E
2026-02-04,0.10207,0.328711,1.37699,0.862003,one
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018,one
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994,two
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453,three
2026-02-08,0.856077,0.373343,-0.277296,1.811717,four
2026-02-09,-0.073891,-0.302611,1.143035,-1.356787,three


In [71]:
df3[df3["E"].isin(["two","four"])]

Unnamed: 0,A,B,C,D,E
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994,two
2026-02-08,0.856077,0.373343,-0.277296,1.811717,four


# Missing data

In [74]:
df1 = df.reindex(index=dates[0:4] , columns=list(df.columns) + ["E"])

In [75]:
df1

Unnamed: 0,A,B,C,D,E
2026-02-04,0.10207,0.328711,1.37699,0.862003,
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018,
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994,
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453,


In [78]:
df1.loc[dates[0]:dates[1] , "E"] = 1

In [80]:
df1

Unnamed: 0,A,B,C,D,E
2026-02-04,0.10207,0.328711,1.37699,0.862003,1.0
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018,1.0
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994,
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453,


In [82]:
df1["F"] = [np.nan , 1 , 2 ,3 ]

In [83]:
df1

Unnamed: 0,A,B,C,D,E,F
2026-02-04,0.10207,0.328711,1.37699,0.862003,1.0,
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018,1.0,1.0
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994,,2.0
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453,,3.0


In [84]:
df1.dropna(how = "any")

Unnamed: 0,A,B,C,D,E,F
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018,1.0,1.0


In [85]:
df1

Unnamed: 0,A,B,C,D,E,F
2026-02-04,0.10207,0.328711,1.37699,0.862003,1.0,
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018,1.0,1.0
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994,,2.0
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453,,3.0


In [86]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,E,F
2026-02-04,0.10207,0.328711,1.37699,0.862003,1.0,5.0
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018,1.0,1.0
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994,5.0,2.0
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453,5.0,3.0


In [87]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,E,F
2026-02-04,False,False,False,False,False,True
2026-02-05,False,False,False,False,False,False
2026-02-06,False,False,False,False,True,False
2026-02-07,False,False,False,False,True,False


In [90]:
df1.isna()

Unnamed: 0,A,B,C,D,E,F
2026-02-04,False,False,False,False,False,True
2026-02-05,False,False,False,False,False,False
2026-02-06,False,False,False,False,True,False
2026-02-07,False,False,False,False,True,False


# Operations

# Stats

In [91]:
df.mean()

A   -0.267121
B    0.194042
C   -0.126599
D    0.215076
E    1.000000
dtype: float64

In [92]:
df.mean(axis=1)

2026-02-04    0.733955
2026-02-05   -0.464116
2026-02-06    0.089074
2026-02-07   -0.446671
2026-02-08    0.690960
2026-02-09   -0.147563
Freq: D, dtype: float64

In [96]:
s = pd.Series([1,3,4,np.nan,6,7] , index = dates ).shift(2)
s

2026-02-04    NaN
2026-02-05    NaN
2026-02-06    1.0
2026-02-07    3.0
2026-02-08    4.0
2026-02-09    NaN
Freq: D, dtype: float64

In [97]:
df

Unnamed: 0,A,B,C,D,E
2026-02-04,0.10207,0.328711,1.37699,0.862003,1.0
2026-02-05,-2.11521,1.332548,-1.425901,-1.112018,1.0
2026-02-06,-0.411939,-0.098942,-0.324817,1.191994,
2026-02-07,0.040169,-0.468798,-1.251603,-0.106453,
2026-02-08,0.856077,0.373343,-0.277296,1.811717,
2026-02-09,-0.073891,-0.302611,1.143035,-1.356787,


In [98]:
df.sub(s, axis="index")

Unnamed: 0,A,B,C,D,E
2026-02-04,,,,,
2026-02-05,,,,,
2026-02-06,-1.411939,-1.098942,-1.324817,0.191994,
2026-02-07,-2.959831,-3.468798,-4.251603,-3.106453,
2026-02-08,-3.143923,-3.626657,-4.277296,-2.188283,
2026-02-09,,,,,


## user defined functions

In [99]:
df.agg(lambda x : np.mean(x) * 5)

A   -1.335603
B    0.970208
C   -0.632995
D    1.075380
E    5.000000
dtype: float64

In [101]:
df.transform(lambda x : x * 100 )

Unnamed: 0,A,B,C,D,E
2026-02-04,10.206971,32.871058,137.698954,86.20027,100.0
2026-02-05,-211.521002,133.254817,-142.590093,-111.201753,100.0
2026-02-06,-41.193887,-9.894242,-32.481733,119.199386,
2026-02-07,4.01692,-46.879823,-125.160322,-10.645294,
2026-02-08,85.607671,37.33427,-27.729649,181.171672,
2026-02-09,-7.389051,-30.26113,114.303473,-135.678672,


## Value Counts

In [102]:
s = pd.Series(np.random.randint(0, 7, size=10))
s

0    4
1    6
2    3
3    0
4    5
5    5
6    4
7    2
8    3
9    3
dtype: int32

In [103]:
s.value_counts()

3    3
4    2
5    2
6    1
0    1
2    1
Name: count, dtype: int64

## String Methods

In [104]:
s = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"])
s.str.lower()


0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

# Merge

## concat

In [105]:
df = pd.DataFrame(np.random.randn(10,4))
df

Unnamed: 0,0,1,2,3
0,1.323222,1.652791,-1.204307,-1.604478
1,-1.047316,-0.784108,-0.419333,2.124715
2,0.436363,-0.435392,-0.615774,-0.438462
3,-2.853294,-1.345848,1.053138,-0.026563
4,-1.163794,0.229502,-0.591547,-0.33968
5,1.038401,-0.055095,-1.056278,0.646077
6,-1.303576,1.075703,1.003719,0.319672
7,0.10897,-1.641044,0.082947,1.270211
8,1.005981,-0.973675,2.043411,0.023518
9,-2.334675,-1.769934,0.877631,-0.620905


In [106]:
pieces = [df[:3], df[3:7], df[7:]]

In [111]:
type(pieces)

list

In [112]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,1.323222,1.652791,-1.204307,-1.604478
1,-1.047316,-0.784108,-0.419333,2.124715
2,0.436363,-0.435392,-0.615774,-0.438462
3,-2.853294,-1.345848,1.053138,-0.026563
4,-1.163794,0.229502,-0.591547,-0.33968
5,1.038401,-0.055095,-1.056278,0.646077
6,-1.303576,1.075703,1.003719,0.319672
7,0.10897,-1.641044,0.082947,1.270211
8,1.005981,-0.973675,2.043411,0.023518
9,-2.334675,-1.769934,0.877631,-0.620905


## join

In [113]:
left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["foo", "foo"], "rval": [4, 5]})

In [116]:
print(left)
print(right)

   key  lval
0  foo     1
1  foo     2
   key  rval
0  foo     4
1  foo     5


In [117]:
pd.merge(right , left , on="key")

Unnamed: 0,key,rval,lval
0,foo,4,1
1,foo,4,2
2,foo,5,1
3,foo,5,2


In [118]:
left = pd.DataFrame({"key": ["foo", "bar"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["foo", "bar"], "rval": [4, 5]})

In [119]:
left

Unnamed: 0,key,lval
0,foo,1
1,bar,2


In [120]:
right

Unnamed: 0,key,rval
0,foo,4
1,bar,5


In [121]:
pd.merge(right , left , on="key")

Unnamed: 0,key,rval,lval
0,foo,4,1
1,bar,5,2


# Grouping

In [122]:
df = pd.DataFrame(
    {
        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
        "C": np.random.randn(8),
        "D": np.random.randn(8),
    }
)


In [123]:
df

Unnamed: 0,A,B,C,D
0,foo,one,1.238039,-0.928712
1,bar,one,-0.125104,1.170687
2,foo,two,1.783176,0.170846
3,bar,three,-1.288291,0.614988
4,foo,two,0.006542,-0.273846
5,bar,two,-0.306933,0.079612
6,foo,one,-0.00515,0.668272
7,foo,three,0.037202,2.31662


In [124]:
df.groupby("A")[["C","D"]].sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,-1.720328,1.865287
foo,3.059809,1.953179


In [125]:
df.groupby(["A","B"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.125104,1.170687
bar,three,-1.288291,0.614988
bar,two,-0.306933,0.079612
foo,one,0.616445,-0.13022
foo,three,0.037202,2.31662
foo,two,0.894859,-0.0515


# Reshaping

## Stack

In [126]:
arrays = [
   ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
   ["one", "two", "one", "two", "one", "two", "one", "two"],
]

In [127]:
index = pd.MultiIndex.from_arrays(arrays, names=["first", "second"])

In [128]:
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [129]:
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])

In [130]:
df2 = df[:4]

In [131]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-1.827003,-0.520118
bar,two,0.711242,0.58967
baz,one,0.99346,0.247251
baz,two,1.558021,1.140693


In [132]:
df2.stack()

first  second   
bar    one     A   -1.827003
               B   -0.520118
       two     A    0.711242
               B    0.589670
baz    one     A    0.993460
               B    0.247251
       two     A    1.558021
               B    1.140693
dtype: float64

In [133]:
df2.unstack()

Unnamed: 0_level_0,A,A,B,B
second,one,two,one,two
first,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
bar,-1.827003,0.711242,-0.520118,0.58967
baz,0.99346,1.558021,0.247251,1.140693


## Pivot Tables

In [134]:
df = pd.DataFrame(
    {
        "A": ["one", "one", "two", "three"] * 3,
        "B": ["A", "B", "C"] * 4,
        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 2,
        "D": np.random.randn(12),
        "E": np.random.randn(12),
    }
)


In [135]:
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,1.339281,1.837401
1,one,B,foo,0.942742,1.730405
2,two,C,foo,0.078547,-1.157274
3,three,A,bar,0.81252,-0.363089
4,one,B,bar,-0.849308,-1.940041
5,one,C,bar,-0.532081,0.451638
6,two,A,foo,0.012929,-1.111937
7,three,B,foo,-0.220347,0.287718
8,one,C,foo,2.642506,-0.73706
9,one,A,bar,-0.966146,-0.737746


In [136]:
pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.966146,1.339281
one,B,-0.849308,0.942742
one,C,-0.532081,2.642506
three,A,0.81252,
three,B,,-0.220347
three,C,0.424278,
two,A,,0.012929
two,B,0.583236,
two,C,,0.078547


# Time Series

In [139]:
rng = pd.date_range("1/1/2012", periods=100, freq="s")
rng

DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
               '2012-01-01 00:00:02', '2012-01-01 00:00:03',
               '2012-01-01 00:00:04', '2012-01-01 00:00:05',
               '2012-01-01 00:00:06', '2012-01-01 00:00:07',
               '2012-01-01 00:00:08', '2012-01-01 00:00:09',
               '2012-01-01 00:00:10', '2012-01-01 00:00:11',
               '2012-01-01 00:00:12', '2012-01-01 00:00:13',
               '2012-01-01 00:00:14', '2012-01-01 00:00:15',
               '2012-01-01 00:00:16', '2012-01-01 00:00:17',
               '2012-01-01 00:00:18', '2012-01-01 00:00:19',
               '2012-01-01 00:00:20', '2012-01-01 00:00:21',
               '2012-01-01 00:00:22', '2012-01-01 00:00:23',
               '2012-01-01 00:00:24', '2012-01-01 00:00:25',
               '2012-01-01 00:00:26', '2012-01-01 00:00:27',
               '2012-01-01 00:00:28', '2012-01-01 00:00:29',
               '2012-01-01 00:00:30', '2012-01-01 00:00:31',
               '2012-01-

In [140]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)

In [141]:
ts

2012-01-01 00:00:00    304
2012-01-01 00:00:01     43
2012-01-01 00:00:02    309
2012-01-01 00:00:03     94
2012-01-01 00:00:04    297
                      ... 
2012-01-01 00:01:35    395
2012-01-01 00:01:36    188
2012-01-01 00:01:37    328
2012-01-01 00:01:38    420
2012-01-01 00:01:39    237
Freq: S, Length: 100, dtype: int32

In [142]:
ts.resample("5Min").sum()

2012-01-01    25725
Freq: 5T, dtype: int32

In [144]:
rng = pd.date_range("3/6/2012 00:00", periods=5, freq="D")

In [145]:
ts = pd.Series(np.random.randn(len(rng)), rng)

In [146]:
ts

2012-03-06   -1.325516
2012-03-07    0.521518
2012-03-08   -0.248532
2012-03-09   -0.061032
2012-03-10   -1.982265
Freq: D, dtype: float64

In [147]:
ts_utc = ts.tz_localize("UTC")

In [148]:
ts_utc

2012-03-06 00:00:00+00:00   -1.325516
2012-03-07 00:00:00+00:00    0.521518
2012-03-08 00:00:00+00:00   -0.248532
2012-03-09 00:00:00+00:00   -0.061032
2012-03-10 00:00:00+00:00   -1.982265
Freq: D, dtype: float64