In [1]:
import pandas as pd
import numpy as np


In [2]:
pd.Series([1,2,3,4])

0    1
1    2
2    3
3    4
dtype: int64

In [3]:
data =pd.Series([2,343243,23,233,234,23],
           index=['a','b','c','d','e','f'])

In [4]:
data

a         2
b    343243
c        23
d       233
e       234
f        23
dtype: int64

In [5]:
population_dict = {
    'Dhaka' : 23112312312,
    'Cumilla' : 23423423,
    'Chittagong': 234232323,
    'Rajshahi' : 34423423,
    'Sylhet' : 112312312
}

p_data = pd.Series(population_dict)

In [6]:
p_data

Dhaka         23112312312
Cumilla          23423423
Chittagong      234232323
Rajshahi         34423423
Sylhet          112312312
dtype: int64

In [7]:
# -> slicing 

p_data['Dhaka' : 'Rajshahi']

Dhaka         23112312312
Cumilla          23423423
Chittagong      234232323
Rajshahi         34423423
dtype: int64

In [8]:
# -> list of dictionary to data 
data1 = [
    {
        'a': i,
        'b': 2 * i}
        for i in range(3)
]
pd.DataFrame(data1)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [9]:
# -> from two dimensional array 

pd.DataFrame(np.random.rand(3,2),
             columns=['food', 'bar'],
            index=['a','b','c'])

Unnamed: 0,food,bar
a,0.136548,0.598198
b,0.754849,0.866297
c,0.684977,0.170314


In [10]:
data = pd.Series(['a','b','c'],
                 index=[1,3,5])

data

1    a
3    b
5    c
dtype: object

In [11]:
data.loc[1]

'a'

In [12]:
data.loc[1:3]

1    a
3    b
dtype: object

In [13]:
data.iloc[1]

'b'

In [14]:
data.iloc[1:3]

3    b
5    c
dtype: object

In [15]:
area = pd.Series({
    'California': 34241234,
    'Texas' : 1232133,
    'New York' : 6456234,
    'Florida' : 29284343,
    'Tokyo': 2023030
})

pop = pd.Series({
    'California' : 32424232423,
    'Florida': 123123123,
    'Tokyo': 5234234,
    'New York': 1232132,
    'Texas' : 605023043
})

In [16]:
data = pd.DataFrame({'area' : area, 'pop' : pop})

In [17]:
data

Unnamed: 0,area,pop
California,34241234,32424232423
Florida,29284343,123123123
New York,6456234,1232132
Texas,1232133,605023043
Tokyo,2023030,5234234


In [18]:
data['area']

California    34241234
Florida       29284343
New York       6456234
Texas          1232133
Tokyo          2023030
Name: area, dtype: int64

In [19]:
data.area

California    34241234
Florida       29284343
New York       6456234
Texas          1232133
Tokyo          2023030
Name: area, dtype: int64

In [20]:
data.area is data['area']

True

In [21]:
data.pop is data['pop']

# -> This is false because pandas has a method as 'pop'
# -> The data.pop is actually indexing to that method.

False

In [22]:
# -> adding new column to the main data 

data['destiny']= data['pop']/data['area']
data

Unnamed: 0,area,pop,destiny
California,34241234,32424232423,946.935278
Florida,29284343,123123123,4.204401
New York,6456234,1232132,0.190844
Texas,1232133,605023043,491.037123
Tokyo,2023030,5234234,2.587324


In [23]:
# raw underlying data 

data.values

array([[3.42412340e+07, 3.24242324e+10, 9.46935278e+02],
       [2.92843430e+07, 1.23123123e+08, 4.20440107e+00],
       [6.45623400e+06, 1.23213200e+06, 1.90843764e-01],
       [1.23213300e+06, 6.05023043e+08, 4.91037123e+02],
       [2.02303000e+06, 5.23423400e+06, 2.58732396e+00]])

In [24]:
data.T

Unnamed: 0,California,Florida,New York,Texas,Tokyo
area,34241230.0,29284340.0,6456234.0,1232133.0,2023030.0
pop,32424230000.0,123123100.0,1232132.0,605023000.0,5234234.0
destiny,946.9353,4.204401,0.1908438,491.0371,2.587324


In [25]:
data.values[0]

array([3.42412340e+07, 3.24242324e+10, 9.46935278e+02])

In [26]:
# accessing a single column 
# call it by column name 

data['area']

California    34241234
Florida       29284343
New York       6456234
Texas          1232133
Tokyo          2023030
Name: area, dtype: int64

In [27]:
data.iloc[:3,:2]

Unnamed: 0,area,pop
California,34241234,32424232423
Florida,29284343,123123123
New York,6456234,1232132


In [28]:
data.loc[:'New York',:'pop']

Unnamed: 0,area,pop
California,34241234,32424232423
Florida,29284343,123123123
New York,6456234,1232132


In [29]:
data.iloc[0,2]=90
data

Unnamed: 0,area,pop,destiny
California,34241234,32424232423,90.0
Florida,29284343,123123123,4.204401
New York,6456234,1232132,0.190844
Texas,1232133,605023043,491.037123
Tokyo,2023030,5234234,2.587324


In [30]:
# the operations are completed in row wise .not column wise 

data[1:3]

# starting from row 1 and ending at row 2 . print()

Unnamed: 0,area,pop,destiny
Florida,29284343,123123123,4.204401
New York,6456234,1232132,0.190844


In [31]:
data[data.destiny >100]

Unnamed: 0,area,pop,destiny
Texas,1232133,605023043,491.037123


**UFUNCS : Index Preservation**

In [32]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0,10,4))
ser

0    6
1    3
2    7
3    4
dtype: int32

In [33]:
df = pd.DataFrame(rng.randint(0,10,(3,4)),
                  columns=['A','B','c','D'])
df

Unnamed: 0,A,B,c,D
0,6,9,2,6
1,7,4,3,7
2,7,2,5,4


In [34]:
np.exp(ser)

0     403.428793
1      20.085537
2    1096.633158
3      54.598150
dtype: float64

In [35]:
np.sin(df * np.pi /4)

Unnamed: 0,A,B,c,D
0,-1.0,0.7071068,1.0,-1.0
1,-0.707107,1.224647e-16,0.707107,-0.7071068
2,-0.707107,1.0,-0.707107,1.224647e-16


In [36]:
A = rng.randint(10, size=(3, 4))
A


array([[1, 7, 5, 1],
       [4, 0, 9, 5],
       [8, 0, 9, 2]])

In [37]:
A - A[0]

# subtraction is done row wise

array([[ 0,  0,  0,  0],
       [ 3, -7,  4,  4],
       [ 7, -7,  4,  1]])

In [38]:
df = pd.DataFrame(A, columns=list('WRsT'))
df-df.iloc[0]

# panda also do the row wise subtraction 

Unnamed: 0,W,R,s,T
0,0,0,0,0
1,3,-7,4,4
2,7,-7,4,1


In [39]:
# if you want to do column wise you can give the axis .
# this is column wise operation .

df.subtract(df['R'],axis=0)

Unnamed: 0,W,R,s,T
0,-6,0,-2,-6
1,4,0,9,5
2,8,0,9,2
