In [18]:
import pandas as pd
import numpy as np

In [3]:
# Series
# -> a pandas series is an one dimensional array of indexed data. 
data = pd.Series([0.25, 0.5, 0.75, 1])
data.values, data.index

(array([0.25, 0.5 , 0.75, 1.  ]), RangeIndex(start=0, stop=4, step=1))

In [4]:
# while the numpy has an implicitly defined integer index used to access the values. 
# the pandas series has an explicitly defined index associated with the values.
data = pd.Series([0.25, 0.5, 0.75, 1], index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [9]:
# Series as Specialized Dictionary
population_dict = {
    'California':3833259,
    'Texas': 27328974,
    'New York': 19232131
}
population = pd.Series(population_dict)
print(population)
print("population of California = \n", population['California'])
print("population of California to New York = \n", population['California':'New York'])


California     3833259
Texas         27328974
New York      19232131
dtype: int64
population of California = 
 3833259
population of California to New York = 
 California     3833259
Texas         27328974
New York      19232131
dtype: int64


In [17]:
# different ways to constructing a series object
print("From Array = \n", pd.Series([1,2,2,4,5,6]), end="\n\n")
print("From scaller value = \n", pd.Series(5, index=[100,200,300,400]), end="\n\n")
print("From dictionary = \n", pd.Series({'a':1, 'b':2, 'c':3}))

From Array = 
 0    1
1    2
2    2
3    4
4    5
5    6
dtype: int64

From scaller value = 
 100    5
200    5
300    5
400    5
dtype: int64

From dictionary = 
 a    1
b    2
c    3
dtype: int64


In [26]:
# DataFrames
# dataframes as a generalized numpy array
# -> you can think of a dataframe as a sequence of series objects, means they share same index

area_dict = {
    'California': 423432423,
    'Texas': 32432433,
    'New York': 14142423
}
area = pd.Series(area_dict)
states = pd.DataFrame({
    'population' : population,
    'area': area
})
states

Unnamed: 0,population,area
California,3833259,423432423
Texas,27328974,32432433
New York,19232131,14142423


In [27]:
# dataframes as specialized dictionary
states['area']

California    423432423
Texas          32432433
New York       14142423
Name: area, dtype: int64

In [28]:
# Creating DataFrames objects
# 1) From a Single Series object
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
California,3833259
Texas,27328974
New York,19232131


In [29]:
# 2) From a list of Dictionary
data = [ {'a':i, 'b': i*2} for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [30]:
# Even if some keys in the dictionary are missing then pandas will fill them 
# with NaN (i.e, not a number) values.
pd.DataFrame([{'a':1 ,'b':2}, {'b':3, 'c':4}])

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [31]:
# 3) From a dictionary of series objects
pd.DataFrame({
    'population': population,
    'area': area
})

Unnamed: 0,population,area
California,3833259,423432423
Texas,27328974,32432433
New York,19232131,14142423


In [33]:
# 4) From a two Dimensional numpy array
pd.DataFrame(np.random.randint(0,10, size=(3,2)), index=['a','b','c'], columns=['Foo','Bar'])

Unnamed: 0,Foo,Bar
a,1,0
b,5,5
c,0,0


In [37]:
# 5) From a numpy structured array
a = np.zeros(3, dtype=[('A','i8'), ('B','f8')])
pd.DataFrame(a)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0
