#  Data Manipulation With Pandas

Name: Prashanth B

In [1]:
import pandas as pd

# Pandas Series

In [None]:
#pandas series object
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [None]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [None]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [None]:
data[1]

0.5

In [None]:
#values are modified
data[1]=0.35
data

0    0.25
1    0.35
2    0.75
3    1.00
dtype: float64

In [None]:
data[1:3]

1    0.35
2    0.75
dtype: float64

In [None]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a','b','c','d'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [None]:
data['b']

0.5

In [None]:
#non-sequential index
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=[2,5,3,7])
data

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64

In [None]:
data[5]

0.5

In [2]:
#series as specialized dictionary
population_dict = {'California':38332521,
                  'Texas':26448193,
                  'New York':19651127,
                  'Florida':19552860,
                  'Illinois':12882135}
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [3]:
population['California']

38332521

In [4]:
population['California':'Illinois'] #both included

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [5]:
#constructing series objects
pd.Series([2,4,6])

0    2
1    4
2    6
dtype: int64

In [None]:
pd.Series(5, index=[100,200,300])

100    5
200    5
300    5
dtype: int64

In [None]:
pd.Series({2:'a', 1:'b', 3:'c'})

2    a
1    b
3    c
dtype: object

In [None]:
pd.Series({2:'a',1:'b',3:'c'}, index=[3,2])

3    c
2    a
dtype: object

# Pandas DataFrame

In [6]:
area_dict = {'California':423967,
            'Texas':695662,
            'New York':141297,
            'Florida':170312,
            'Illinois':149995}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [7]:
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [8]:
states = pd.DataFrame({'Population':population, 'Area':area})
states

Unnamed: 0,Population,Area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [None]:
states.index

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

In [None]:
states.columns

Index(['Population', 'Area'], dtype='object')

In [None]:
#constructing Dataframe objects
#from a single series object
pd.DataFrame(population, columns=['Population'])

Unnamed: 0,Population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [None]:
#from a list of dict
data = [{'a':i, 'b':2*i} for i in range(5)]
data

[{'a': 0, 'b': 0},
 {'a': 1, 'b': 2},
 {'a': 2, 'b': 4},
 {'a': 3, 'b': 6},
 {'a': 4, 'b': 8}]

In [None]:
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4
3,3,6
4,4,8


In [None]:
#from a dictionary of series objects
pd.DataFrame({'population':population, 'area':area})

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [9]:
import numpy as np

#from a two dimensional numpy array
pd.DataFrame(np.random.rand(3,2), columns=['A','B'], index=['001','002','003'])

Unnamed: 0,A,B
1,0.192453,0.926685
2,0.667858,0.301166
3,0.371545,0.942356


In [None]:
#index as immutable array
ind = pd.Index([2,3,5,7,11])
ind

Int64Index([2, 3, 5, 7, 11], dtype='int64')

In [None]:
#immutability
#ind[1]=0 #gives error

In [None]:
#index as ordered set
indA = pd.Index([1,3,5,7,9])
print(indA)
indB = pd.Index([2,3,5,7,11])
print(indB)

Int64Index([1, 3, 5, 7, 9], dtype='int64')
Int64Index([2, 3, 5, 7, 11], dtype='int64')


# Indexers: loc, iloc, ix

In [None]:
data = pd.Series(['a','b','c'], index=[1,3,5])
data

1    a
3    b
5    c
dtype: object

In [None]:
data.loc[1] #explicit(takes as a particular value)

'a'

In [None]:
data.iloc[1] #implicit(takes as index number)

'b'

In [None]:
data.iloc[1:3]

3    b
5    c
dtype: object

In [None]:
data = pd.DataFrame({'Area':area, 'Population':population})
data

Unnamed: 0,Area,Population
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


In [None]:
data['Area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: Area, dtype: int64

In [None]:
data.Area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: Area, dtype: int64

In [None]:
data.Area is data['Area']

True

In [None]:
data['Density'] = data['Population']/data['Area']
data

Unnamed: 0,Area,Population,Density
California,423967,38332521,90.413926
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763


In [None]:
#Data as two dimensional array
data.values

array([[4.23967000e+05, 3.83325210e+07, 9.04139261e+01],
       [6.95662000e+05, 2.64481930e+07, 3.80187404e+01],
       [1.41297000e+05, 1.96511270e+07, 1.39076746e+02],
       [1.70312000e+05, 1.95528600e+07, 1.14806121e+02],
       [1.49995000e+05, 1.28821350e+07, 8.58837628e+01]])

In [None]:
data.T #Transpose

Unnamed: 0,California,Texas,New York,Florida,Illinois
Area,423967.0,695662.0,141297.0,170312.0,149995.0
Population,38332520.0,26448190.0,19651130.0,19552860.0,12882140.0
Density,90.41393,38.01874,139.0767,114.8061,85.88376


In [None]:
data.iloc[:3,:2] #3 rows and 2 columns

Unnamed: 0,Area,Population
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127


In [None]:
data.loc[:'Florida',:'Population'] #upto florida colwise & upto population rowwise

Unnamed: 0,Area,Population
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860


In [None]:
data.loc[data.Density > 100]

Unnamed: 0,Area,Population,Density
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121


# Ufuncs

In [None]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0,10,4))
ser

0    6
1    3
2    7
3    4
dtype: int32

In [None]:
df = pd.DataFrame(rng.randint(0,10,(3,4)), columns=['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
0,6,9,2,6
1,7,4,3,7
2,7,2,5,4


In [None]:
np.exp(ser)

0     403.428793
1      20.085537
2    1096.633158
3      54.598150
dtype: float64

In [None]:
np.sin(df*np.pi/4)

Unnamed: 0,A,B,C,D
0,-1.0,0.7071068,1.0,-1.0
1,-0.707107,1.224647e-16,0.707107,-0.7071068
2,-0.707107,1.0,-0.707107,1.224647e-16


In [None]:
A = pd.DataFrame(rng.randint(0,10,(2,3)), columns=['A','B','C'])
A

Unnamed: 0,A,B,C
0,1,7,5
1,1,4,0


In [None]:
B = pd.DataFrame(rng.randint(0,10,(3,3)), columns = ['A','B','C'])
B

Unnamed: 0,A,B,C
0,9,5,8
1,0,9,2
2,6,3,8


In [None]:
data = A + B
data

Unnamed: 0,A,B,C
0,10.0,12.0,13.0
1,1.0,13.0,2.0
2,,,


In [None]:
data.dropna()

Unnamed: 0,A,B,C
0,10.0,12.0,13.0
1,1.0,13.0,2.0


In [None]:
data.dropna(axis='columns')

0
1
2


In [None]:
data.dropna(how='all')

Unnamed: 0,A,B,C
0,10.0,12.0,13.0
1,1.0,13.0,2.0


In [None]:
#filling null values
data.fillna(0)

Unnamed: 0,A,B,C
0,10.0,12.0,13.0
1,1.0,13.0,2.0
2,0.0,0.0,0.0


In [None]:
#fill forward
data.fillna(method='ffill')

Unnamed: 0,A,B,C
0,10.0,12.0,13.0
1,1.0,13.0,2.0
2,1.0,13.0,2.0


In [None]:
#back fill
data.fillna(method='bfill')

Unnamed: 0,A,B,C
0,10.0,12.0,13.0
1,1.0,13.0,2.0
2,,,


In [None]:
#Hierarchical indexing
#use pandas multiindex
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [None]:
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [None]:
ser1 = pd.Series(['a','b','c'])
ser1

0    a
1    b
2    c
dtype: object

In [None]:
ser2 = pd.Series(['1','2','3'], index=[3,4,5])
ser2

3    1
4    2
5    3
dtype: object

In [None]:
pd.concat([ser1, ser2])

0    a
1    b
2    c
3    1
4    2
5    3
dtype: object

In [None]:
df1 = pd.DataFrame({'employee':['Bob','Jake','Lisa','Sue'],
                   'group':['Accounting','Engineering','Engineering','HR']})
df2 = pd.DataFrame({'employee':['Lisa','Bob','Jake','Sue'],
                   'hire_date':[2004,2008,2012,2014]})

In [None]:
print(df1);print(df2)

  employee        group
0      Bob   Accounting
1     Jake  Engineering
2     Lisa  Engineering
3      Sue           HR
  employee  hire_date
0     Lisa       2004
1      Bob       2008
2     Jake       2012
3      Sue       2014


In [None]:
df3 = pd.merge(df1,df2)
df3

Unnamed: 0,employee,group,hire_date
0,Bob,Accounting,2008
1,Jake,Engineering,2012
2,Lisa,Engineering,2004
3,Sue,HR,2014


In [None]:
print(pd.merge(df1,df2,on='employee'))

  employee        group  hire_date
0      Bob   Accounting       2008
1     Jake  Engineering       2012
2     Lisa  Engineering       2004
3      Sue           HR       2014


In [None]:
abbrevs = pd.read_csv('state-abbrevs.csv')
areas = pd.read_csv('state-areas.csv')
population = pd.read_csv('state-population.csv')

In [None]:
print(df_population.head());print(df_areas.head());print(df_abbrevs.head())

  state/region     ages  year  population
0           AL  under18  2012   1117489.0
1           AL    total  2012   4817528.0
2           AL  under18  2010   1130966.0
3           AL    total  2010   4785570.0
4           AL  under18  2011   1125763.0
        state  area (sq. mi)
0     Alabama          52423
1      Alaska         656425
2     Arizona         114006
3    Arkansas          53182
4  California         163707
        state abbreviation
0     Alabama           AL
1      Alaska           AK
2     Arizona           AZ
3    Arkansas           AR
4  California           CA


In [None]:
merged = pd.merge(population, abbrevs, how='outer', left_on='state/region', right_on='abbreviation')
merged

Unnamed: 0,state/region,ages,year,population,state,abbreviation
0,AL,under18,2012,1117489.0,Alabama,AL
1,AL,total,2012,4817528.0,Alabama,AL
2,AL,under18,2010,1130966.0,Alabama,AL
3,AL,total,2010,4785570.0,Alabama,AL
4,AL,under18,2011,1125763.0,Alabama,AL
...,...,...,...,...,...,...
2539,USA,total,2010,309326295.0,,
2540,USA,under18,2011,73902222.0,,
2541,USA,total,2011,311582564.0,,
2542,USA,under18,2012,73708179.0,,


In [None]:
#drop duplicate info
#merged = merged.drop(labels=None, axis=1, inplace=True) --> not working
#merged

In [None]:
merged.isnull().any()

state/region    False
ages            False
year            False
population       True
state            True
dtype: bool

In [None]:
#simple aggregation in pandas
ser=pd.Series(rng.rand(5))
ser

0    0.965632
1    0.808397
2    0.304614
3    0.097672
4    0.684233
dtype: float64

In [None]:
ser.sum()

2.8605482908829316

In [None]:
ser.mean()

0.5721096581765863

In [None]:
df = pd.DataFrame({'A':rng.rand(5),
                  'B':rng.rand(5)})
df

Unnamed: 0,A,B
0,0.440152,0.25878
1,0.122038,0.662522
2,0.495177,0.311711
3,0.034389,0.520068
4,0.90932,0.54671


In [None]:
df.mean()

A    0.400215
B    0.459958
dtype: float64

In [None]:
df.mean('columns')

0    0.349466
1    0.392280
2    0.403444
3    0.277228
4    0.728015
dtype: float64

# GroupBy: Split, Apply, Combine

In [None]:
df = pd.DataFrame({'key':['A','B','C','A','B','C'],
                  'data': range(6)}, columns=['key','data'])
df

Unnamed: 0,key,data
0,A,0
1,B,1
2,C,2
3,A,3
4,B,4
5,C,5


In [None]:
df.groupby('key')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000138ECE4F2E0>

In [None]:
df.groupby('key').sum()

Unnamed: 0_level_0,data
key,Unnamed: 1_level_1
A,3
B,5
C,7


In [None]:
df = pd.DataFrame({'key':['A','B','C','A','B','C'],
                  'data1': range(6),
                  'data2': rng.randint(0,10,6)}, columns=['key','data1','data2'])
df

Unnamed: 0,key,data1,data2
0,A,0,5
1,B,1,1
2,C,2,9
3,A,3,1
4,B,4,9
5,C,5,3


In [None]:
#Aggregation
df.groupby('key').aggregate([min, np.median, max])

Unnamed: 0_level_0,data1,data1,data1,data2,data2,data2
Unnamed: 0_level_1,min,median,max,min,median,max
key,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A,0,1.5,3,1,3.0,5
B,1,2.5,4,1,5.0,9
C,2,3.5,5,3,6.0,9


In [None]:
#Filtering
def filter_func(x):
    return x['data2'].std() > 4

print(df); print(df.groupby('key').std());

  key  data1  data2
0   A      0      5
1   B      1      1
2   C      2      9
3   A      3      1
4   B      4      9
5   C      5      3
       data1     data2
key                   
A    2.12132  2.828427
B    2.12132  5.656854
C    2.12132  4.242641


In [None]:
print(df.groupby('key').filter(filter_func))

  key  data1  data2
1   B      1      1
2   C      2      9
4   B      4      9
5   C      5      3


In [None]:
#transformation
df.groupby('key').transform(lambda x: x - x.mean())

Unnamed: 0,data1,data2
0,-1.5,2.0
1,-1.5,-4.0
2,-1.5,3.0
3,1.5,-2.0
4,1.5,4.0
5,1.5,-3.0


In [None]:
#Pivot tables
import seaborn as sns

titanic = sns.load_dataset('titanic') #available on seaborn library
titanic

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [None]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [None]:
#Pivot tables by hand
titanic.groupby('sex')[['survived']].mean()

Unnamed: 0_level_0,survived
sex,Unnamed: 1_level_1
female,0.742038
male,0.188908


In [None]:
titanic.groupby(['sex','class'])['survived'].aggregate('mean').unstack()

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [None]:
#Pivot table syntax
titanic.pivot_table('survived',index='sex',columns='class')

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [None]:
#Multilevel pivot tables
age = pd.cut(titanic['age'],[0,18,80])
titanic.pivot_table('survived',['sex',age],[fare,'class'])

Unnamed: 0_level_0,fare,"(-0.001, 14.454]","(-0.001, 14.454]","(-0.001, 14.454]","(14.454, 512.329]","(14.454, 512.329]","(14.454, 512.329]"
Unnamed: 0_level_1,class,First,Second,Third,First,Second,Third
sex,age,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
female,"(0, 18]",,1.0,0.714286,0.909091,1.0,0.318182
female,"(18, 80]",,0.88,0.444444,0.972973,0.914286,0.391304
male,"(0, 18]",,0.0,0.26087,0.8,0.818182,0.178571
male,"(18, 80]",0.0,0.098039,0.125,0.391304,0.030303,0.192308


In [None]:
fare = pd.qcut(titanic['fare'],2)
titanic.pivot_table('survived',['sex',age],[fare,'class'])

Unnamed: 0_level_0,fare,"(-0.001, 14.454]","(-0.001, 14.454]","(-0.001, 14.454]","(14.454, 512.329]","(14.454, 512.329]","(14.454, 512.329]"
Unnamed: 0_level_1,class,First,Second,Third,First,Second,Third
sex,age,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
female,"(0, 18]",,1.0,0.714286,0.909091,1.0,0.318182
female,"(18, 80]",,0.88,0.444444,0.972973,0.914286,0.391304
male,"(0, 18]",,0.0,0.26087,0.8,0.818182,0.178571
male,"(18, 80]",0.0,0.098039,0.125,0.391304,0.030303,0.192308


In [None]:
#example
titanic.pivot_table(index='sex',columns='class',aggfunc={'survived':sum,'fare':'mean'})

Unnamed: 0_level_0,fare,fare,fare,survived,survived,survived
class,First,Second,Third,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
female,106.125798,21.970121,16.11881,91,70,72
male,67.226127,19.741782,12.661633,45,17,47


In [None]:
#Vectorized string operations
data = ['peter','Paul','MARY','gUIDO']
names = pd.Series(data) #converting LIST to Pandas Series
names

0    peter
1     Paul
2     MARY
3    gUIDO
dtype: object

In [None]:
names.str.capitalize() #titleCase

0    Peter
1     Paul
2     Mary
3    Guido
dtype: object

In [None]:
names.str.lower()

0    peter
1     paul
2     mary
3    guido
dtype: object

In [None]:
names.str.upper()

0    PETER
1     PAUL
2     MARY
3    GUIDO
dtype: object

In [None]:
names.str.len()

0    5
1    4
2    4
3    5
dtype: int64

In [None]:
names.str.startswith('P') #CaseSensitive

0    False
1     True
2    False
3    False
dtype: bool

In [None]:
#Vectorized item access and slicing
names.str[0:3]

0    pet
1    Pau
2    MAR
3    gUI
dtype: object

In [None]:
names.str.split().str.get(-1)

0    peter
1     Paul
2     MARY
3    gUIDO
dtype: object

# Working with Time Series

In [10]:
from datetime import datetime
datetime(year=2015, month=7, day=4)

datetime.datetime(2015, 7, 4, 0, 0)

In [11]:
from dateutil import parser
date = parser.parse("4th of July, 2015")
date

datetime.datetime(2015, 7, 4, 0, 0)

In [12]:
date.strftime('%A')

'Saturday'

In [14]:
#Typed array of times: Numpy's datetime64
date = np.array('2015-07-04', dtype=np.datetime64)
date

array('2015-07-04', dtype='datetime64[D]')

In [15]:
date + np.arange(12)

array(['2015-07-04', '2015-07-05', '2015-07-06', '2015-07-07',
       '2015-07-08', '2015-07-09', '2015-07-10', '2015-07-11',
       '2015-07-12', '2015-07-13', '2015-07-14', '2015-07-15'],
      dtype='datetime64[D]')

In [16]:
np.datetime64('2015-07-04')

numpy.datetime64('2015-07-04')

In [17]:
np.datetime64('2015-07-04 12:00')

numpy.datetime64('2015-07-04T12:00')

In [18]:
np.datetime64('2015-07-04 12:59:59.50', 'ns')

numpy.datetime64('2015-07-04T12:59:59.500000000')

In [19]:
date = pd.to_datetime('4th of July, 2015')
date

Timestamp('2015-07-04 00:00:00')

In [20]:
date.strftime('%B')

'July'

In [21]:
date.strftime('%A')

'Saturday'

In [22]:
date.strftime('%C')

'20'

In [23]:
date.strftime('%a')

'Sat'

In [24]:
date.strftime('%b')

'Jul'

In [25]:
date.strftime('%c')

'Sat Jul  4 00:00:00 2015'

In [26]:
dates = pd.to_datetime([datetime(2015,7,3),'4th of July, 2015','2015-Jul-6','07-07-2015','20150708'])
dates

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
               '2015-07-08'],
              dtype='datetime64[ns]', freq=None)

In [27]:
dates - dates[0]

TimedeltaIndex(['0 days', '1 days', '3 days', '4 days', '5 days'], dtype='timedelta64[ns]', freq=None)

In [29]:
pd.date_range('2015-07-03', '2015-07-10')

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-05', '2015-07-06',
               '2015-07-07', '2015-07-08', '2015-07-09', '2015-07-10'],
              dtype='datetime64[ns]', freq='D')

In [30]:
pd.date_range('2015-07-03', periods=8)

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-05', '2015-07-06',
               '2015-07-07', '2015-07-08', '2015-07-09', '2015-07-10'],
              dtype='datetime64[ns]', freq='D')

In [31]:
pd.date_range('2015-07-03', periods=8, freq='H')

DatetimeIndex(['2015-07-03 00:00:00', '2015-07-03 01:00:00',
               '2015-07-03 02:00:00', '2015-07-03 03:00:00',
               '2015-07-03 04:00:00', '2015-07-03 05:00:00',
               '2015-07-03 06:00:00', '2015-07-03 07:00:00'],
              dtype='datetime64[ns]', freq='H')

In [32]:
pd.date_range('2015-07-03', periods=8, freq='M')

DatetimeIndex(['2015-07-31', '2015-08-31', '2015-09-30', '2015-10-31',
               '2015-11-30', '2015-12-31', '2016-01-31', '2016-02-29'],
              dtype='datetime64[ns]', freq='M')

In [33]:
pd.timedelta_range(0, periods=10, freq='H')

TimedeltaIndex(['0 days 00:00:00', '0 days 01:00:00', '0 days 02:00:00',
                '0 days 03:00:00', '0 days 04:00:00', '0 days 05:00:00',
                '0 days 06:00:00', '0 days 07:00:00', '0 days 08:00:00',
                '0 days 09:00:00'],
               dtype='timedelta64[ns]', freq='H')

Thank you