In [5]:
import numpy as np
import pandas as pd

In [12]:
# The Pandas Series Object
data = pd.Series([0.25, 0.50, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [13]:
# Extracting the values from the Series
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [15]:
# Extracting the index from the Series
data.index

RangeIndex(start=0, stop=4, step=1)

In [18]:
# Indexing in pandas
data[1]

0.5

In [21]:
# Slicing in pandas
data[1 : 3]

1    0.50
2    0.75
dtype: float64

In [23]:
# Customizing the Series index
data = pd.Series([0.25, 0.50, 0.75, 1.00], index = ['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [24]:
# Accesing the Series with customised index
data['b']

0.5

In [30]:
# Allocating nonsequential indices for the Series
data = pd.Series([0.25, 0.50, 0.75, 1.00], index = [1, 3, 5, 7])
data

1    0.25
3    0.50
5    0.75
7    1.00
dtype: float64

In [31]:
# Accessing the Series with nonsequencial index
data[5]

0.75

In [33]:
# Creating a Series as a dictionary analogy
population_dict = {'Tamil Nadu' : 38332521, 'Kerala' : 26448193, 'Karnataka' : 19651127, 'Andhra' : 19552860, "Rajasthan" : 12882135}
population = pd.Series(population_dict)
population

Tamil Nadu    38332521
Kerala        26448193
Karnataka     19651127
Andhra        19552860
Rajasthan     12882135
dtype: int64

In [34]:
# Accesing a population Series with index 'Andhra'
population['Andhra']

19552860

In [35]:
# Accesing a population Series with slicing
population['Kerala' : 'Rajasthan']

Kerala       26448193
Karnataka    19651127
Andhra       19552860
Rajasthan    12882135
dtype: int64

In [39]:
area_dict = {'Tamil Nadu' : 423967, 'Kerala' : 695662, 'Karnataka' : 141297, 'Andhra' : 170312, 'Rajasthan' : 149995}
area = pd.Series(area_dict)
area

Tamil Nadu    423967
Kerala        695662
Karnataka     141297
Andhra        170312
Rajasthan     149995
dtype: int64

In [40]:
# Creating a DataFrame from a dictionary
states = pd.DataFrame({'population' : population, 'area' : area})
states

Unnamed: 0,population,area
Tamil Nadu,38332521,423967
Kerala,26448193,695662
Karnataka,19651127,141297
Andhra,19552860,170312
Rajasthan,12882135,149995


In [41]:
# Extracting the index from states DataFrame
states.index

Index(['Tamil Nadu', 'Kerala', 'Karnataka', 'Andhra', 'Rajasthan'], dtype='object')

In [42]:
# Extracting the column from states DataFrame
states.columns

Index(['population', 'area'], dtype='object')

In [46]:
# Extracting the data from states DataFrame by 'area' column
states['area']

Tamil Nadu    423967
Kerala        695662
Karnataka     141297
Andhra        170312
Rajasthan     149995
Name: area, dtype: int64

In [48]:
# Constructing a DataFrame object from a list of dicts
data = [{'a' : i, 'b' : i ** i} for i in range(3)]
df = pd.DataFrame(data)
df

Unnamed: 0,a,b
0,0,1
1,1,1
2,2,4


In [56]:
import numpy as np
import pandas as pd
# Constructing a DataFrame object from a two-dimensional numpy array
data = pd.DataFrame(np.random.rand(6, 2), columns = ['C1', 'C2'], index = ['a', 'b', 'c', 'd', 'e', 'f'])
data

Unnamed: 0,C1,C2
a,0.251371,0.954273
b,0.609311,0.222099
c,0.792701,0.938215
d,0.989696,0.993509
e,0.345441,0.889931
f,0.805516,0.809823


In [58]:
print("Size of the DataFrame is:", data.size)
print("Shape of the DataFrame is:", data.shape)
print("Dimension of the DataFrame is:", data.ndim)

Size of the DataFrame is: 12
Shape of the DataFrame is: (6, 2)
Dimension of the DataFrame is: 2


In [60]:
# Indexes as ordered set
ind1 = pd.Index([1, 2, 3, 4, 5])
ind2 = pd.Index([3, 4, 5, 6, 7])

In [61]:
# Intersection on two indexes
ind1 & ind2

Int64Index([3, 4, 5], dtype='int64')

In [62]:
# Union on two indexes
ind1 | ind2

Int64Index([1, 2, 3, 4, 5, 6, 7], dtype='int64')

In [63]:
# Symmetric difference between two indexes
ind1 ^ ind2

Int64Index([1, 2, 6, 7], dtype='int64')

In [7]:
import numpy as np
import pandas as pd
# Creating a Series
data = pd.Series(['a', 'b', 'c'], index = [1, 3, 5])
data

1    a
3    b
5    c
dtype: object

In [13]:
# Indexers: loc, iloc, and ix
# The loc attribute allows indexing and slicing that always references the explicit index
print(data.loc[1])
print(data.loc[1:3])

a
1    a
3    b
dtype: object


In [14]:
# The iloc attribute allows indexing and slicing that always references the implicit Python-style index
print(data.iloc[1])
print(data.iloc[1:3])

b
3    b
5    c
dtype: object


In [18]:
# DataFrame as a dictionary
area = pd.Series({'California' : 423967, 'Texas' : 695662, 'New York' : 141297, 'Florida' : 170312, 'Illinois' : 149995})
population = pd.Series({'California' : 38332521, 'Texas' : 26448193, 'New York' : 19651127, 'Florida' : 19552860, 'Illinois' : 12882135})
data = pd.DataFrame({'area' : area, 'population' : population})
data

Unnamed: 0,area,population
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


In [29]:
# Method 1: Extracting the column 'area'
data['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [30]:
# Method 2: Extracting the column 'area'
data.area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [31]:
# Adding a new column in a DataFrame
data['density'] = data['population'] / data['area']
data

Unnamed: 0,area,population,density
California,423967,38332521,90.413926
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763


In [33]:
# swapping/transforming rows & columns
data.T

Unnamed: 0,California,Texas,New York,Florida,Illinois
area,423967.0,695662.0,141297.0,170312.0,149995.0
population,38332520.0,26448190.0,19651130.0,19552860.0,12882140.0
density,90.41393,38.01874,139.0767,114.8061,85.88376


In [42]:
# Extracting the DataFrame using iloc
data.iloc[:3, :3]

Unnamed: 0,area,population,density
California,423967,38332521,90.413926
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746


In [43]:
# Extracting the DataFrame using loc
data.loc[:'Florida', :'density']

Unnamed: 0,area,population,density
California,423967,38332521,90.413926
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121


In [45]:
# Extracting the DataFrame using ix
data.ix[:3, :'population']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Unnamed: 0,area,population
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127


In [48]:
# Extracting the DataFrame using loc with combine masking & fancy indexing
data.loc[data['density'] > 100, ['population', 'density']]

Unnamed: 0,population,density
New York,19651127,139.076746
Florida,19552860,114.806121


In [54]:
data[data.density > 100]

Unnamed: 0,area,population,density
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121


In [53]:
data['Texas' : 'Illinois']

Unnamed: 0,area,population,density
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763


In [55]:
data [1 : 3]

Unnamed: 0,area,population,density
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
