In [2]:
import numpy as np 
import pandas as pd
from matplotlib import pyplot as plt

In [4]:
"""
Pandas Objects: Series
"""

# Create a series from an array

ser = pd.Series([0.25,0.5,0.75,1.0])
print(ser)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [6]:
# Main Attributes : 'volues' 'index'

arr = ser.values
print(arr)
ind = ser.index
print(ind)

[0.25 0.5  0.75 1.  ]
RangeIndex(start=0, stop=4, step=1)


In [10]:
# Label-based Indexing

ser = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
print(ser)

ser['a']

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64


0.25

In [16]:
"""
Creating a Series Object from a Dictionary
"""

population_dict = {'California': 38332521,
                    'Texas': 26448193,
                    "New York": 19651127,
                    'Florida': 19552860,
                    'Illinois': 12882135}

population = pd.Series(population_dict)
print(population)

print(population['California':'New York']) 

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64
California    38332521
Texas         26448193
New York      19651127
dtype: int64


In [18]:
"""
Pandas Objects: DataFrame
"""

area_dict = {'California': 423967,'Texas': 695662, 'New York': 141297,
'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)

print(area)


California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64


In [22]:
# Construct a DataFrame containing 'population ' and ' area' series

states = pd.DataFrame({'population': population, 'area': area})
print(states)

print(states.index)

print(states.columns)

            population    area
California    38332521  423967
Texas         26448193  695662
New York      19651127  141297
Florida       19552860  170312
Illinois      12882135  149995
Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')
Index(['population', 'area'], dtype='object')


In [29]:
# Construct a DataFrame from a 2D NumPy array

print(states['area'])

np.random.seed(1)
arr = np.random.rand(3,2)
print(arr,'\n')

df = pd.DataFrame(arr, columns=['foo','bar'], index = ['a','b','c'])

print(df)

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64
[[4.17022005e-01 7.20324493e-01]
 [1.14374817e-04 3.02332573e-01]
 [1.46755891e-01 9.23385948e-02]] 

        foo       bar
a  0.417022  0.720324
b  0.000114  0.302333
c  0.146756  0.092339


In [35]:
"""
Series object manipulation : dictionary-style
"""
ser = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
print(ser)
print('a' in ser)

print(ser.keys())

ser['e'] = 1.25 
ser['a'] = 0.125
print(ser)

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64
True
Index(['a', 'b', 'c', 'd'], dtype='object')
a    0.125
b    0.500
c    0.750
d    1.000
e    1.250
dtype: float64


In [41]:
"""
DataFrame object manipulation
"""

states['density'] = states['population'] / states['area']

print(states)

            population    area     density
California    38332521  423967   90.413926
Texas         26448193  695662   38.018740
New York      19651127  141297  139.076746
Florida       19552860  170312  114.806121
Illinois      12882135  149995   85.883763


In [47]:
# Indexer : Loc , iloc

states.loc['California' : 'New York']


states,loc['population':0]

NameError: name 'loc' is not defined

In [48]:
# Masking and fancy indexing using the Loc indexer

states.loc[states.density > 100, ['population', 'density']]


Unnamed: 0,population,density
New York,19651127,139.076746
Florida,19552860,114.806121
